File Coverage

factor.c

Criterion	Covered	Total	%
statement	900	1071	84.0
branch	759	1214	62.5
condition			n/a
subroutine			n/a
pod			n/a
total	1659	2285	72.6

line	stmt	bran	code
1			#include
2			#include
3			#include
4			#include
5
6			#define FUNC_ipow 1
7			#define FUNC_isqrt 1
8			#define FUNC_gcd_ui 1
9			#define FUNC_is_perfect_square 1
10			#define FUNC_clz 1
11			#define FUNC_log2floor 1
12			#include "ptypes.h"
13			#include "factor.h"
14			#include "sieve.h"
15			#include "util.h"
16			#include "sort.h"
17			#include "mulmod.h"
18			#include "cache.h"
19			#include "primality.h"
20			#include "lucas_seq.h"
21			#include "montmath.h"
22			static int holf32(uint32_t n, UV *factors, uint32_t rounds);
23
24			/*
25			* You need to remember to use UV for unsigned and IV for signed types that
26			* are large enough to hold our data.
27			* If you use int, that's 32-bit on LP64 and LLP64 machines. You lose.
28			* If you use long, that's 32-bit on LLP64 machines. You lose.
29			* If you use long long, you may be too large which isn't so bad, but some
30			* compilers may not understand the type at all.
31			* perl.h already figured all this out, and provided us with these types which
32			* match the native integer type used inside our Perl, so just use those.
33			*/
34
35			static const unsigned short primes_small[] =
36			{0,2,3,5,7,11,13,17,19,23,29,31,37,41,43,47,53,59,61,67,71,73,79,83,89,97,
37			101,103,107,109,113,127,131,137,139,149,151,157,163,167,173,179,181,191,
38			193,197,199,211,223,227,229,233,239,241,251,257,263,269,271,277,281,283,
39			293,307,311,313,317,331,337,347,349,353,359,367,373,379,383,389,397,401,
40			409,419,421,431,433,439,443,449,457,461,463,467,479,487,491,499,503,509,
41			521,523,541,547,557,563,569,571,577,587,593,599,601,607,613,617,619,631,
42			641,643,647,653,659,661,673,677,683,691,701,709,719,727,733,739,743,751,
43			757,761,769,773,787,797,809,811,821,823,827,829,839,853,857,859,863,877,
44			881,883,887,907,911,919,929,937,941,947,953,967,971,977,983,991,997,1009,
45			1013,1019,1021,1031,1033,1039,1049,1051,1061,1063,1069,1087,1091,1093,
46			1097,1103,1109,1117,1123,1129,1151,1153,1163,1171,1181,1187,1193,1201,
47			1213,1217,1223,1229,1231,1237,1249,1259,1277,1279,1283,1289,1291,1297,
48			1301,1303,1307,1319,1321,1327,1361,1367,1373,1381,1399,1409,1423,1427,
49			1429,1433,1439,1447,1451,1453,1459,1471,1481,1483,1487,1489,1493,1499,
50			1511,1523,1531,1543,1549,1553,1559,1567,1571,1579,1583,1597,1601,1607,
51			1609,1613,1619,1621,1627,1637,1657,1663,1667,1669,1693,1697,1699,1709,
52			1721,1723,1733,1741,1747,1753,1759,1777,1783,1787,1789,1801,1811,1823,
53			1831,1847,1861,1867,1871,1873,1877,1879,1889,1901,1907,1913,1931,1933,
54			1949,1951,1973,1979,1987,1993,1997,1999,2003,2011};
55			#define NPRIMES_SMALL (sizeof(primes_small)/sizeof(primes_small[0]))
56
57			/* For doing trial division loops over the small primes.
58			* Returns either 1 or the new unfactored n.
59			* Puts any factors in place and increments *nfactors.
60			* Assumes n has no factors smaller than primes_small[sp].
61			* Will check primes_small[sp] .. primes_small[endsp] inclusive.
62			* endsp will be clamped to NPRIMES_SMALL-1.
63			*/
64	84600		static uint32_t _trial32(uint32_t n, UV factors, int nfactors, uint32_t sp, uint32_t endsp) {
65			uint32_t f;
66	84600	50	if (sp < 1) sp = 1;
67	84600	50	if (endsp > NPRIMES_SMALL-1) endsp = NPRIMES_SMALL-1;
68	84600	50	if (sp > endsp \|\| n == 1) return n;
		100
69
70			do {
71	534923		f = primes_small[sp];
72	534923	100	if (f*f > n) break;
73	493487	100	while (n % f == 0) {
74	23200		factors[(*nfactors)++] = f;
75	23200		n /= f;
76			}
77	470287	100	} while (++sp <= endsp);
78
79	64921	100	if (f*f > n && n != 1) {
		100
80	63830		factors[(*nfactors)++] = n;
81	63830		n = 1;
82			}
83	64921		return n;
84			}
85	125		static UV _trialuv(UV n, UV factors, int nfactors, uint32_t sp, uint32_t endsp) {
86			uint32_t f;
87	125	50	if (sp < 1) sp = 1;
88	125	50	if (endsp > NPRIMES_SMALL-1) endsp = NPRIMES_SMALL-1;
89	125	50	if (sp > endsp \|\| n == 1) return n;
		50
90
91			do {
92	9171		f = primes_small[sp];
93	9171	100	if (f*f > n) break;
94	9372	100	while (n % f == 0) {
95	213		factors[(*nfactors)++] = f;
96	213		n /= f;
97			}
98	9159	100	} while (++sp <= endsp);
99
100	125	100	if (f*f > n && n != 1) {
		50
101	12		factors[(*nfactors)++] = n;
102	12		n = 1;
103			}
104	125		return n;
105			}
106
107	84627		static int _small_trial_factor(UV n, UV factors, UV newn, uint32_t *lastf)
108			{
109	84627		int nfactors = 0;
110	84627		uint32_t const endsp = 82;
111	84627		uint32_t sp = 4;
112	84627		uint32_t f = 7;
113
114	84627	100	if (n > 1) {
115	137629	100	while ( (n & 1) == 0 ) { factors[nfactors++] = 2; n /= 2; }
116	118489	100	while ( (n % 3) == 0 ) { factors[nfactors++] = 3; n /= 3; }
117	102295	100	while ( (n % 5) == 0 ) { factors[nfactors++] = 5; n /= 5; }
118			}
119
120			/* Trial primes 7 to 421 */
121	84502		n = (n <= 4294967295U) ? _trial32(n, factors, &nfactors, sp, endsp)
122	169129	100	: _trialuv(n, factors, &nfactors, sp, endsp);
123	84627		sp = endsp+1; /* 83 */
124	84627		f = primes_small[sp]; /* 431 */
125
126	84627	100	if (n < 20172017 && ff <= n) { /* Trial division from 431 to 2011 */
		100
127	98		uint32_t const lastsp = NPRIMES_SMALL-1;
128	98		n = _trial32(n, factors, &nfactors, sp, lastsp);
129	98		f = 2017;
130			}
131
132	84627	100	if (f*f > n && n != 1) {
		50
133	0		factors[nfactors++] = n;
134	0		n = 1;
135			}
136	84627	50	if (newn) *newn = n;
137	84627	50	if (lastf) *lastf = f;
138	84627		return nfactors;
139			}
140
141	186		static int _power_factor(UV n, UV *factors)
142			{
143			uint32_t root;
144			int nfactors, i, j, k;
145	186	50	if (n > 3 && (k = powerof_ret(n, &root))) {
		100
146	8		nfactors = factor(root, factors);
147	26	100	for (i = nfactors; i >= 0; i--)
148	56	100	for (j = 0; j < k; j++)
149	38		factors[k*i+j] = factors[i];
150	8		return k * nfactors;
151			}
152	178		factors[0] = n;
153	178		return 1;
154			}
155
156			/* Find one factor of an input n. */
157	126		int factor_one(UV n, UV *factors, bool primality, bool trial)
158			{
159			int nfactors;
160	126	50	if (n < 4) {
161	0		factors[0] = n;
162	0		return (n == 1) ? 0 : 1;
163			}
164			/* TODO: deal with small n */
165	126	50	if (trial) {
166			uint32_t sp, f;
167	0	0	if (!(n&1)) { factors[0] = 2; factors[1] = n >> 1; return 2; }
168	0	0	if (!(n%3)) { factors[0] = 3; factors[1] = n / 3; return 2; }
169	0	0	if (!(n%5)) { factors[0] = 5; factors[1] = n / 5; return 2; }
170	0	0	for (sp = 4; (f = primes_small[sp]) < 2011; sp++) {
171	0	0	if ( (n % f) == 0 )
172	0		{ factors[0] = f; factors[1] = n/f; return 2; }
173			}
174	0	0	if (n < f*f) { factors[0] = n; return 1; }
175			}
176	126	50	if (primality && is_prime(n)) {
		0
177	0		factors[0] = n;
178	0		return 1;
179			}
180			#if 0 /* Simple solution, just fine on x86_64 */
181			nfactors = (n < 1073741824UL) ? holf32(n, factors, 1000000)
182			: pbrent_factor(n, factors, 500000, 1);
183			if (nfactors < 2) croak("factor_one failed on %lu\n", n);
184			#endif
185
186			#if BITS_PER_WORD == 64
187			/* For small semiprimes the fastest solution is HOLF under 32, then
188			* Lehman (no trial) under 38. On random inputs, HOLF is best somewhere
189			* between 28 and 32 bits. Adding Lehman is always slower. */
190	126	100	if (n <= 0xFFFFFFFFU) {
191	41		nfactors = holf32(n, factors, 10000); /* 2400 is enough */
192	41	50	if (nfactors > 1) return nfactors;
193			}
194			#endif
195
196			{
197			/* Adjust the number of rounds based on the number size and speed */
198	85	50	UV const nbits = BITS_PER_WORD - clz(n);
199			#if USE_MONTMATH
200	85		UV const br_rounds = 8000 + (9000 * ((nbits <= 45) ? 0 : (nbits-45)));
201	85		UV const sq_rounds = 200000;
202			#elif MULMODS_ARE_FAST
203			UV const br_rounds = 100 + ( 100 * ((nbits <= 45) ? 0 : (nbits-45)));
204			UV const sq_rounds = 100000;
205			#else
206			UV const br_rounds = (nbits >= 63) ? 120000 : (nbits >= 58) ? 500 : 0;
207			UV const sq_rounds = 200000;
208			#endif
209			/* Almost all inputs are factored here */
210	85	50	if (br_rounds > 0) {
211	85		nfactors = pbrent_factor(n, factors, br_rounds, 1);
212	85	100	if (nfactors > 1) return nfactors;
213			}
214			#if USE_MONTMATH
215	10		nfactors = pbrent_factor(n, factors, 2*br_rounds, 3);
216	10	50	if (nfactors > 1) return nfactors;
217			#endif
218			/* Random 64-bit inputs at this point:
219			* About 3.1% are small enough that we did with HOLF.
220			* montmath: 96.89% pbrent, 0.01% pbrent2
221			* fast: 73.43% pbrent, 21.97% squfof, 1.09% p-1, 0.49% prho, long
222			* slow: 75.34% squfof, 19.47% pbrent, 0.20% p-1, 0.06% prho
223			*/
224			/* SQUFOF with these parameters gets 99.9% of everything left */
225	0	0	if (nbits <= 62) {
226	0		nfactors = squfof_factor(n, factors, sq_rounds);
227	0	0	if (nfactors > 1) return nfactors;
228			}
229			/* At this point we should only have 16+ digit semiprimes. */
230	0		nfactors = pminus1_factor(n, factors, 8000, 120000);
231	0	0	if (nfactors > 1) return nfactors;
232			/* Get the stragglers */
233	0		nfactors = pbrent_factor(n, factors, 500000, 5);
234	0	0	if (nfactors > 1) return nfactors;
235	0		nfactors = prho_factor(n, factors, 180000);
236	0	0	if (nfactors > 1) return nfactors;
237	0		nfactors = cheb_factor(n, factors, 1000000, 0);
238	0	0	if (nfactors > 1) return nfactors;
239	0		croak("factor_one failed on %lu\n", n);
240			}
241			return nfactors;
242			}
243
244			/******************************************************************************/
245			/* Main factor loop */
246			/* */
247			/* Puts factors in factors[] and returns the number found. */
248			/******************************************************************************/
249	84627		int factor(UV n, UV *factors)
250			{
251			UV tofac_stack[MPU_MAX_FACTORS+1];
252	84627		int nsmallfactors, npowerfactors, nfactors, i, j, ntofac = 0;
253			uint32_t f;
254
255	84627		nfactors = _small_trial_factor(n, factors, &n, &f);
256	84627	100	if (n == 1) return nfactors;
257
258			#if BITS_PER_WORD == 64
259			/* For small values less than f^3, use simple factor to split semiprime */
260	296	100	if (n < 100000000 && n < fff) {
		100
261	110	100	if (MR32(n)) factors[nfactors++] = n;
262	45		else nfactors += holf32(n, factors+nfactors, 10000);
263	110		return nfactors;
264			}
265			#endif
266
267	186		nsmallfactors = nfactors;
268
269			/* Perfect powers. Factor root only once. */
270	186		npowerfactors = _power_factor(n, factors+nsmallfactors);
271	186	100	if (npowerfactors > 1) return nsmallfactors + npowerfactors;
272
273			/* loop over each remaining factor, until ntofac == 0 */
274			do {
275	414	100	while ( (n >= f*f) && (!is_def_prime(n)) ) {
		100
		100
		100
276	118		int split_success = factor_one(n, tofac_stack+ntofac, 0, 0) - 1;
277	118	50	if (split_success != 1 \|\| tofac_stack[ntofac] == 1 \|\| tofac_stack[ntofac] == n)
		50
		50
278	0		croak("internal: factor_one failed to factor %"UVuf"\n", n);
279	118		ntofac++; /* Leave one on the to-be-factored stack */
280	118		n = tofac_stack[ntofac]; /* Set n to the other one */
281			}
282			/* n is now prime (or 1), so add to already-factored stack */
283	296	50	if (n != 1) factors[nfactors++] = n;
284			/* Pop the next number off the to-factor stack */
285	296	100	if (ntofac > 0) n = tofac_stack[ntofac-1];
286	296	100	} while (ntofac-- > 0);
287
288			/* Sort the non-small factors */
289	296	100	for (i = nsmallfactors+1; i < nfactors; i++) {
290	118		UV fi = factors[i];
291	242	100	for (j = i; j > 0 && factors[j-1] > fi; j--)
		100
292	124		factors[j] = factors[j-1];
293	118		factors[j] = fi;
294			}
295	178		return nfactors;
296			}
297
298	70939		void factorintp(factored_t *nf, UV n)
299			{
300	70939		UV fac[MPU_MAX_FACTORS], *f = nf->f;
301	70939		uint8_t *e = nf->e;
302			uint32_t nfactors, i, j;
303
304	70939		nf->n = n;
305	70939	100	if (n < 4) {
306	852		f[0] = n;
307	852		e[0] = 1;
308	852		nf->nfactors = 1 - (n==1);
309	852		return;
310			}
311	70087		nfactors = factor(n, fac);
312	70087		f[0] = fac[0];
313	70087		e[0] = 1;
314	164362	100	for (i = 1, j = 0; i < nfactors; i++) {
315	94275	100	if (fac[i] == fac[i-1])
316	30872		e[j]++;
317			else
318	63403		f[++j] = fac[i], e[j] = 1;
319			}
320	70087		nf->nfactors = (uint16_t)j+1;
321			}
322
323	0		void factoredp_validate(const factored_t *nf)
324			{
325	0	0	if (nf->n == 0) {
326	0	0	MPUassert(nf->nfactors == 1, "factored_t n=0 => nfactors = 0");
327	0	0	MPUassert(nf->f[0] == 0 && nf->e[0] == 1, "factored_t n=0 => vecprod = n");
		0
328	0	0	} else if (nf->n == 1) {
329	0	0	MPUassert(nf->nfactors == 0, "factored_t n=1 => nfactors = 0");
330			} else {
331	0		UV lf = 0, N = 1, t;
332			uint32_t i;
333	0	0	MPUassert(nf->nfactors <= MPU_MAX_DFACTORS, "factored_t n has too many factors");
334	0	0	for (i = 0; i < nf->nfactors; i++) {
335	0	0	MPUassert(is_prime(nf->f[i]), "factored_t n has non-prime factor");
336	0	0	MPUassert(lf < nf->f[i], "factored_t factors not in order");
337	0		lf = nf->f[i];
338	0	0	MPUassert(nf->e[i] < BITS_PER_WORD, "factored_t exponent k too high");
339	0	0	MPUassert(nf->e[i] > 0, "factored_t exponent k too low");
340	0	0	if (nf->e[i] == 1) {
341	0		N *= nf->f[i];
342			} else {
343	0		t = ipowsafe(nf->f[i], nf->e[i]);
344	0	0	MPUassert(t != UV_MAX, "factored_t f^e overflows")
345	0		N *= t;
346			}
347			}
348	0	0	MPUassert(N == nf->n, "factored_t n is not equal to f^e * f^e ...");
349			}
350	0		}
351	0		uint32_t factoredp_total_factors(const factored_t *nf) {
352	0		uint32_t i, nfacs = 0;
353	0	0	for (i = 0; i < nf->nfactors; i++)
354	0		nfacs += nf->e[i];
355	0		return nfacs;
356			}
357	5887		bool factoredp_is_square_free(const factored_t *nf) {
358			uint32_t i;
359	19505	100	for (i = 0; i < nf->nfactors; i++)
360	13670	100	if (nf->e[i] > 1)
361	52		break;
362	5887		return i >= nf->nfactors;
363			}
364	15323		signed char factoredp_moebius(const factored_t *nf) {
365			#if 0
366			return !factoredp_is_square_free(nf) ? 0 : nf->nfactors % 2 ? -1 : 1;
367			#else
368			uint32_t i;
369	49182	100	for (i = 0; i < nf->nfactors; i++)
370	33916	100	if (nf->e[i] > 1)
371	57		return 0;
372	15266	100	return nf->nfactors % 2 ? -1 : 1;
373			#endif
374			}
375	0		uint32_t factoredp_linear_factors(UV fac[], const factored_t *nf) {
376	0		uint32_t i, nfac = 0;
377	0	0	for (i = 0; i < nf->nfactors; i++) {
378	0		UV f = nf->f[i], e = nf->e[i];
379	0	0	while (e--)
380	0		fac[nfac++] = f;
381			}
382	0		return nfac;
383			}
384
385			/******************************************************************************/
386
387
388	188		int prime_bigomega(UV n)
389			{
390			UV factors[MPU_MAX_FACTORS+1];
391	188		return factor(n, factors);
392			}
393	195		int prime_omega(UV n)
394			{
395	195	100	if (n <= 1) return (n==0);
396	191		return factorint(n).nfactors;
397			}
398
399
400	579		int trial_factor(UV n, UV *factors, UV f, UV last)
401			{
402	579		int sp, nfactors = 0;
403
404	579	50	if (f < 2) f = 2;
405	579	100	if (last == 0 \|\| last*last > n) last = UV_MAX;
		100
406
407	579	50	if (n < 4 \|\| last < f) {
		100
408	43		factors[0] = n;
409	43		return (n == 1) ? 0 : 1;
410			}
411
412			/* possibly do uint32_t specific code here */
413
414	536	50	if (f < primes_small[NPRIMES_SMALL-1]) {
415	536	50	while ( (n & 1) == 0 ) { factors[nfactors++] = 2; n >>= 1; }
416	536	50	if (3<=last) while ( (n % 3) == 0 ) { factors[nfactors++] = 3; n /= 3; }
		50
417	536	50	if (5<=last) while ( (n % 5) == 0 ) { factors[nfactors++] = 5; n /= 5; }
		50
418	3602	100	for (sp = 4; sp < (int)NPRIMES_SMALL; sp++) {
419	3598		f = primes_small[sp];
420	3598	100	if (f*f > n \|\| f > last) break;
		100
421	3264	100	while ( (n%f) == 0 ) {
422	198		factors[nfactors++] = f;
423	198		n /= f;
424			}
425			}
426			}
427			/* Trial division using a mod-30 wheel for larger values */
428	536	100	if (f*f <= n && f <= last) {
		100
429	4		UV m, newlimit, limit = isqrt(n);
430	4	100	if (limit > last) limit = last;
431	4		m = f % 30;
432	14078	100	while (f <= limit) {
433	14074	100	if ( (n%f) == 0 ) {
434			do {
435	2		factors[nfactors++] = f;
436	2		n /= f;
437	2	50	} while ( (n%f) == 0 );
438	2		newlimit = isqrt(n);
439	2	50	if (newlimit < limit) limit = newlimit;
440			}
441	14074		f += wheeladvance30[m];
442	14074		m = nextwheel30[m];
443			}
444			}
445			/* All done! */
446	536	100	if (n != 1)
447	526		factors[nfactors++] = n;
448	536		return nfactors;
449			}
450
451
452	3655		static UV _divisors_from_factors(UV* res, factored_t nf, UV k) {
453			UV count;
454			uint32_t i;
455
456	3655		res[0] = count = 1;
457	12093	100	for (i = 0; i < nf.nfactors; i++) {
458	8438		UV s, scount = count, p = nf.f[i], mult = 1;
459	8438		uint32_t j, e = nf.e[i];
460	20030	100	for (j = 0; j < e; j++) {
461	11592		mult *= p;
462	41443	100	for (s = 0; s < scount; s++) {
463	29851		UV t = res[s] * mult;
464	29851	100	if (t <= k)
465	29517		res[count++] = t;
466			}
467			}
468			}
469	3655		return count;
470			}
471
472	3831		UV* divisor_list(UV n, UV *num_divisors, UV maxd)
473			{
474			factored_t nf;
475			UV ndivisors, *divs;
476			uint32_t i;
477
478	3831	100	if (n == 0 \|\| maxd == 0) {
		100
479	5		*num_divisors = 0;
480	5		return 0;
481	3826	100	} else if (n == 1 \|\| maxd == 1) {
		100
482	171		New(0, divs, 1, UV);
483	171		divs[0] = 1;
484	171		*num_divisors = 1;
485	171		return divs;
486			}
487
488	3655	100	if (maxd > n) maxd = n;
489
490			/* Factor and convert to factor/exponent pair */
491	3655		nf = factorint(n);
492			/* Calculate number of divisors, allocate space, fill with divisors */
493	3655		ndivisors = nf.e[0] + 1;
494	8438	100	for (i = 1; i < nf.nfactors; i++)
495	4783		ndivisors *= (nf.e[i] + 1);
496	3655	50	New(0, divs, ndivisors, UV);
497	3655		ndivisors = _divisors_from_factors(divs, nf, maxd);
498			/* Sort divisors (numeric ascending) */
499	3655		sort_uv_array(divs, ndivisors);
500			/* Return number of divisors and list */
501	3655		*num_divisors = ndivisors;
502	3655		return divs;
503			}
504
505
506			/* The usual method, on OEIS for instance, is:
507			* (p^(k*(e+1))-1) / (p^k-1)
508			* but that overflows quicky. Instead we rearrange as:
509			* 1 + p^k + p^k^2 + ... p^k^e
510			* Return 0 if the result overflowed.
511			*/
512			static const UV sigma_overflow[11] =
513			#if BITS_PER_WORD == 64
514			{UVCONST(3000000000000000000),UVCONST(3000000000),2487240,64260,7026,
515			1622, 566, 256, 139, 85, 57};
516			#else
517			{UVCONST(845404560), 52560, 1548, 252, 84, 41, 24, 16, 12, 10, 8};
518			#endif
519	1589		UV divisor_sum(UV n, UV k)
520			{
521			factored_t nf;
522			UV product;
523			uint32_t i, j;
524
525	1589	50	if (k > 11 \|\| (k > 0 && n >= sigma_overflow[k-1])) return 0;
		100
		50
526			/* divisors(0) = [] divisors(1) = [1] */
527	1589	100	if (n <= 1) return n;
528	1219		nf = factorint(n);
529	1219		product = 1;
530	1219	100	if (k == 0) {
531	2330	100	for (i = 0; i < nf.nfactors; i++)
532	1360		product *= (nf.e[i]+1);
533	249	100	} else if (k == 1) {
534	427	100	for (i = 0; i < nf.nfactors; i++) {
535	270		UV f = nf.f[i];
536	270		uint16_t e = nf.e[i];
537	270		UV pke = f, fmult = 1 + f;
538	372	100	while (e-- > 1) {
539	102		pke *= f;
540	102		fmult += pke;
541			}
542	270		product *= fmult;
543			}
544			} else {
545	232	100	for (i = 0; i < nf.nfactors; i++) {
546	140		UV f = nf.f[i];
547	140		uint16_t e = nf.e[i];
548	140		UV fmult, pke, pk = f;
549	347	100	for (j = 1; j < k; j++) pk *= f;
550	140		fmult = 1 + pk;
551	140		pke = pk;
552	259	100	while (e-- > 1) {
553	119		pke *= pk;
554	119		fmult += pke;
555			}
556	140		product *= fmult;
557			}
558			}
559	1219		return product;
560			}
561
562
563
564			/******************************************************************************/
565			/******************************************************************************/
566			/******************************************************************************/
567
568
569	196		static int found_factor(UV n, UV f, UV* factors)
570			{
571	196		UV g = n/f;
572	196	100	if (f == 1 \|\| f == n) {
		50
573	1		factors[0] = n;
574	1		return 1;
575			}
576	195	100	factors[f >= g] = f;
577	195	100	factors[f < g] = g;
578	195	50	MPUassert( factors[0] * factors[1] == n , "incorrect factoring");
579	195		return 2;
580			}
581	10		static int no_factor(UV n, UV* factors)
582			{
583	10		factors[0] = n;
584	10		return 1;
585			}
586
587			/* Knuth volume 2, algorithm C.
588			* Can't compete with HOLF, SQUFOF, pbrent, etc.
589			*/
590	2		int fermat_factor(UV n, UV *factors, UV rounds)
591			{
592			IV sqn, x, y, r;
593	2	50	MPUassert( (n >= 3) && ((n%2) != 0) , "bad n in fermat_factor");
		50
594	2		sqn = isqrt(n);
595	2		x = 2 * sqn + 1;
596	2		y = 1;
597	2		r = (sqn*sqn) - n;
598
599	10	100	while (r != 0) {
600	8	50	if (rounds-- == 0) return no_factor(n,factors);
601	8		r += x;
602	8		x += 2;
603			do {
604	26		r -= y;
605	26		y += 2;
606	26	100	} while (r > 0);
607			}
608	2		r = (x-y)/2;
609	2		return found_factor(n, r, factors);
610			}
611
612			/* Hart's One Line Factorization. */
613	3		int holf_factor(UV n, UV *factors, UV rounds)
614			{
615			UV i, s, m, f;
616			uint32_t root;
617
618	3	50	MPUassert( (n >= 3) && ((n%2) != 0) , "bad n in holf_factor");
		50
619
620			/* We skip the perfect-square test for s in the loop, so we
621			* will never succeed if n is a perfect square. Test that now. */
622	3	50	if (is_perfect_square_ret(n,&root))
623	0		return found_factor(n, root, factors);
624
625	3	50	if (n <= (UV_MAX >> 6)) { /* Try with premultiplier first */
626	3	50	UV npre = n * ( (n <= (UV_MAX >> 13)) ? 720 :
		0
		0
		0
627			(n <= (UV_MAX >> 11)) ? 480 :
628			(n <= (UV_MAX >> 10)) ? 360 :
629			(n <= (UV_MAX >> 8)) ? 60 : 30 );
630	3		UV ni = npre;
631	6839	50	while (rounds--) {
632	6839		s = 1 + (UV)isqrt(ni);
633	6839		m = (s*s) - ni;
634	6839	100	if (is_perfect_square_ret(m, &root)) {
635	2		f = gcd_ui(n, s - root);
636	2	50	if (f > 1 && f < n)
		50
637	2		return found_factor(n, f, factors);
638			}
639	6837	100	if (ni >= (ni+npre)) break;
640	6836		ni += npre;
641			}
642	1	50	if (rounds == (UV) -1)
643	0		return no_factor(n,factors);
644			}
645
646	1	50	for (i = 1; i <= rounds; i++) {
647	1		s = (UV) sqrt( (double)n * (double)i );
648			/* Assume s^2 isn't a perfect square. We're rapidly losing precision
649			* so we won't be able to accurately detect it anyway. */
650	1		s++; /* s = ceil(sqrt(ni)) /
651	1		m = sqrmod(s, n);
652	1	50	if (is_perfect_square_ret(m, &root)) {
653	1	50	f = gcd_ui( (s>root) ? s-root : root-s, n);
654			/* This should always succeed, but with overflow concerns.... */
655	1		return found_factor(n, f, factors);
656			}
657			}
658	0		return no_factor(n,factors);
659			}
660	86		static int holf32(uint32_t n, UV *factors, uint32_t rounds) {
661			UV npre, ni; /* These should be 64-bit */
662			uint32_t s, m, f;
663
664	86	50	if (n < 3) return no_factor(n,factors);
665	86	50	if (!(n&1)) { factors[0] = 2; factors[1] = n/2; return 2; }
666	86	100	if (is_perfect_square_ret(n,&f)) { factors[0] = factors[1] = f; return 2; }
667
668	85		ni = npre = (UV) n * ((BITS_PER_WORD == 64) ? 5040 : 1);
669	3883	50	while (rounds--) {
670	3883		s = 1 + isqrt(ni);
671	3883		m = ((UV)s*(UV)s) - ni;
672	3883	100	if (is_perfect_square_ret(m, &f)) {
673	85		f = gcd_ui(n, s - f);
674	85	50	if (f > 1 && f < n)
		50
675	85		return found_factor(n, f, factors);
676			}
677	3798	50	if (ni >= (ni+npre)) break; /* We've overflowed */
678	3798		ni += npre;
679			}
680	0		return no_factor(n,factors);
681			}
682
683
684			#define ABSDIFF(x,y) (x>y) ? x-y : y-x
685			#if USE_MONTMATH
686			/* Pollard Rho with Brent's updates, using Montgomery reduction. */
687	97		int pbrent_factor(UV n, UV *factors, UV rounds, UV a)
688			{
689	97	50	UV const nbits = BITS_PER_WORD - clz(n);
690	97	100	const UV inner = (nbits <= 31) ? 32 : (nbits <= 35) ? 64 : (nbits <= 40) ? 160 : (nbits <= 52) ? 256 : 320;
		100
		100
		100
691			UV f, m, r, rleft, Xi, Xm, Xs;
692	97		int irounds, fails = 6;
693	97		const uint64_t npi = mont_inverse(n), mont1 = mont_get1(n);
694
695	97	50	MPUassert( (n >= 3) && ((n%2) != 0) , "bad n in pbrent_factor");
		50
696	97		r = f = 1;
697	97		Xi = Xm = Xs = mont1;
698	97		a = mont_geta(a,n);
699
700	966	100	while (rounds > 0) {
701	956		rleft = (r > rounds) ? rounds : r;
702	956		Xm = Xi;
703			/* Do rleft rounds, inner at a time */
704	7004	100	while (rleft > 0) {
705	6137		irounds = (rleft > (UV)inner) ? inner : rleft;
706	6137		rleft -= irounds;
707	6137		rounds -= irounds;
708	6137		Xs = Xi;
709	6137	100	if (n < (1ULL << 63)) {
710	4274		Xi = mont_mulmod63(Xi,Xi+a,n);
711	4274	100	m = ABSDIFF(Xi,Xm);
712	1143271	100	while (--irounds > 0) {
713	1138997		Xi = mont_mulmod63(Xi,Xi+a,n);
714	1138997	100	f = ABSDIFF(Xi,Xm);
715	1138997		m = mont_mulmod63(m, f, n);
716			}
717	1863	100	} else if (a == mont1) {
718	1503		Xi = mont_mulmod64(Xi,Xi+a,n);
719	1503	100	m = ABSDIFF(Xi,Xm);
720	462316	100	while (--irounds > 0) {
721	460813		Xi = mont_mulmod64(Xi,Xi+a,n);
722	460813	100	f = ABSDIFF(Xi,Xm);
723	460813		m = mont_mulmod64(m, f, n);
724			}
725			} else {
726	360		Xi = addmod(mont_mulmod64(Xi,Xi,n), a, n);
727	360	100	m = ABSDIFF(Xi,Xm);
728	108414	100	while (--irounds > 0) {
729	108054		Xi = addmod(mont_mulmod64(Xi,Xi,n), a, n);
730	108054	100	f = ABSDIFF(Xi,Xm);
731	108054		m = mont_mulmod64(m, f, n);
732			}
733			}
734	6137		f = gcd_ui(m, n);
735	6137	100	if (f != 1)
736	89		break;
737			}
738			/* If f == 1, then we didn't find a factor. Move on. */
739	956	100	if (f == 1) {
740	867		r *= 2;
741	867		continue;
742			}
743	89	100	if (f == n) { /* back up, with safety */
744	2		Xi = Xs;
745			do {
746	306	50	if (n < (1ULL << 63) \|\| a == mont1)
		0
747	306	50	Xi = mont_mulmod(Xi,Xi+a,n);
748			else
749	0	0	Xi = addmod(mont_mulmod(Xi,Xi,n),a,n);
750	306	100	m = ABSDIFF(Xi,Xm);
751	306		f = gcd_ui(m, n);
752	306	100	} while (f == 1 && r-- != 0);
		50
753			}
754	89	50	if (f == 0 \|\| f == n) {
		100
755	2	50	if (fails-- <= 0) break;
756	2		Xi = Xm = mont1;
757	2		a = addmod(a, mont_geta(11,n), n);
758	2		continue;
759			}
760	87		return found_factor(n, f, factors);
761			}
762	10		return no_factor(n,factors);
763			}
764			#else
765			/* Pollard Rho with Brent's updates. */
766			int pbrent_factor(UV n, UV *factors, UV rounds, UV a)
767			{
768			UV f, m, r, Xi, Xm;
769			const UV inner = (n <= 4000000000UL) ? 32 : 160;
770			int fails = 6;
771
772			MPUassert( (n >= 3) && ((n%2) != 0) , "bad n in pbrent_factor");
773
774			r = f = Xi = Xm = 1;
775			while (rounds > 0) {
776			UV rleft = (r > rounds) ? rounds : r;
777			UV saveXi = Xi;
778			/* Do rleft rounds, inner at a time */
779			while (rleft > 0) {
780			UV dorounds = (rleft > inner) ? inner : rleft;
781			saveXi = Xi;
782			rleft -= dorounds;
783			rounds -= dorounds;
784			Xi = sqraddmod(Xi, a, n); /* First iteration, no mulmod needed */
785			m = ABSDIFF(Xi,Xm);
786			while (--dorounds > 0) { /* Now do inner-1=63 more iterations */
787			Xi = sqraddmod(Xi, a, n);
788			f = ABSDIFF(Xi,Xm);
789			m = mulmod(m, f, n);
790			}
791			f = gcd_ui(m, n);
792			if (f != 1)
793			break;
794			}
795			/* If f == 1, then we didn't find a factor. Move on. */
796			if (f == 1) {
797			r *= 2;
798			Xm = Xi;
799			continue;
800			}
801			if (f == n) { /* back up, with safety */
802			Xi = saveXi;
803			do {
804			Xi = sqraddmod(Xi, a, n);
805			f = gcd_ui( ABSDIFF(Xi,Xm), n);
806			} while (f == 1 && r-- != 0);
807			}
808			if (f == 0 \|\| f == n) {
809			if (fails-- <= 0) break;
810			Xm = addmod(Xm, 11, n);
811			Xi = Xm;
812			a++;
813			continue;
814			}
815			return found_factor(n, f, factors);
816			}
817			return no_factor(n,factors);
818			}
819			#endif
820
821			/* Pollard's Rho. */
822	2		int prho_factor(UV n, UV *factors, UV rounds)
823			{
824			UV f, i, m, oldU, oldV;
825	2		const UV inner = 64;
826	2		UV U = 7;
827	2		UV V = 7;
828	2		UV a = 1;
829	2		int fails = 3;
830
831	2	50	MPUassert( (n >= 3) && ((n%2) != 0) , "bad n in prho_factor");
		50
832
833	2		rounds = (rounds + inner - 1) / inner;
834
835	2	50	while (rounds-- > 0) {
836	2		m = 1; oldU = U; oldV = V;
837	130	100	for (i = 0; i < inner; i++) {
838	128		U = sqraddmod(U, a, n);
839	128		V = sqraddmod(V, a, n);
840	128		V = sqraddmod(V, a, n);
841	128	100	f = (U > V) ? U-V : V-U;
842	128		m = mulmod(m, f, n);
843			}
844	2		f = gcd_ui(m, n);
845	2	50	if (f == 1)
846	0		continue;
847	2	50	if (f == n) { /* back up to find a factor*/
848	2		U = oldU; V = oldV;
849	2		i = inner;
850			do {
851	6		U = sqraddmod(U, a, n);
852	6		V = sqraddmod(V, a, n);
853	6		V = sqraddmod(V, a, n);
854	6	100	f = gcd_ui( (U > V) ? U-V : V-U, n);
855	6	100	} while (f == 1 && i-- != 0);
		50
856			}
857	2	50	if (f == 0 \|\| f == n) {
		50
858	0	0	if (fails-- <= 0) break;
859	0		U = addmod(U,2,n);
860	0		V = U;
861	0		a += 2;
862	0		continue;
863			}
864	2		return found_factor(n, f, factors);
865			}
866	0		return no_factor(n,factors);
867			}
868
869			/* Pollard's P-1 */
870	8		int pminus1_factor(UV n, UV *factors, UV B1, UV B2)
871			{
872			UV f, k, kmin;
873	8		UV a = 2, q = 2;
874	8		UV savea = 2, saveq = 2;
875	8		UV j = 1;
876	8		UV sqrtB1 = isqrt(B1);
877			#if USE_MONTMATH
878	8		const uint64_t npi = mont_inverse(n), mont1 = mont_get1(n);
879	8		UV ma = mont_geta(a,n);
880			#define PMINUS1_APPLY_POWER ma = mont_powmod(ma, k, n)
881			#define PMINUS1_RECOVER_A a = mont_recover(ma,n)
882			#else
883			#define PMINUS1_APPLY_POWER a = powmod(a, k, n)
884			#define PMINUS1_RECOVER_A
885			#endif
886	8	50	MPUassert( (n >= 3) && ((n%2) != 0) , "bad n in pminus1_factor");
		50
887
888	8	100	if (B1 <= primes_small[NPRIMES_SMALL-2]) {
889			UV i;
890	338	100	for (i = 1; primes_small[i] <= B1; i++) {
891	335		q = k = primes_small[i];
892	335	100	if (q <= sqrtB1) {
893	40		k = q*q; kmin = B1/q;
894	92	100	while (k <= kmin) k *= q;
895			}
896	335		PMINUS1_APPLY_POWER;
897	335	100	if ( (j++ % 32) == 0) {
898	9	50	PMINUS1_RECOVER_A;
899	9	50	if (a == 0 \|\| gcd_ui(a-1, n) != 1)
		100
900			break;
901	7		savea = a; saveq = q;
902			}
903			}
904	5	50	PMINUS1_RECOVER_A;
905			} else {
906	128	50	START_DO_FOR_EACH_PRIME(2, B1) {
		50
		100
		100
		100
		100
		50
		50
		50
		50
		50
907	128		q = k = p;
908	128	50	if (q <= sqrtB1) {
909	128		k = q*q; kmin = B1/q;
910	332	100	while (k <= kmin) k *= q;
911			}
912	128		PMINUS1_APPLY_POWER;
913	128	100	if ( (j++ % 32) == 0) {
914	4	50	PMINUS1_RECOVER_A;
915	4	50	if (a == 0 \|\| gcd_ui(a-1, n) != 1)
		100
916			break;
917	1		savea = a; saveq = q;
918			}
919			} END_DO_FOR_EACH_PRIME
920	3	50	PMINUS1_RECOVER_A;
921			}
922	8	50	if (a == 0) return no_factor(n,factors);
923	8		f = gcd_ui(a-1, n);
924
925			/* If we found more than one factor in stage 1, backup and single step */
926	8	100	if (f == n) {
927	5		a = savea;
928	20	50	START_DO_FOR_EACH_PRIME(saveq, B1) {
		100
		100
		100
		100
		100
		50
		50
		50
		50
		50
929	20		k = p; kmin = B1/p;
930	109	100	while (k <= kmin) k *= p;
931	20		a = powmod(a, k, n);
932	20		f = gcd_ui(a-1, n);
933	20		q = p;
934	20	100	if (f != 1)
935	5		break;
936			} END_DO_FOR_EACH_PRIME
937			/* If f == n again, we could do:
938			* for (savea = 3; f == n && savea < 100; savea = next_prime(savea)) {
939			* a = savea;
940			* for (q = 2; q <= B1; q = next_prime(q)) {
941			* ...
942			* }
943			* }
944			* but this could be a huge time sink if B1 is large, so just fail.
945			*/
946			}
947
948			/* STAGE 2 */
949	8	100	if (f == 1 && B2 > B1) {
		100
950	1		UV bm = a;
951	1		UV b = 1;
952			UV bmdiff;
953	1		UV precomp_bm[111] = {0}; /* Enough for B2 = 189M */
954
955			/* calculate (a^q)^2, (a^q)^4, etc. */
956	1		bmdiff = sqrmod(bm, n);
957	1		precomp_bm[0] = bmdiff;
958	20	100	for (j = 1; j < 20; j++) {
959	19		bmdiff = mulmod(bmdiff,bm,n);
960	19		bmdiff = mulmod(bmdiff,bm,n);
961	19		precomp_bm[j] = bmdiff;
962			}
963
964	1		a = powmod(a, q, n);
965	1		j = 1;
966	1281	50	START_DO_FOR_EACH_PRIME( q+1, B2 ) {
		50
		50
		0
		0
		100
		50
		100
		50
		50
		50
967	1280		UV lastq = q;
968			UV qdiff;
969	1280		q = p;
970			/* compute a^q = a^lastq * a^(q-lastq) */
971	1280		qdiff = (q - lastq) / 2 - 1;
972	1280	50	if (qdiff >= 111) {
973	0		bmdiff = powmod(bm, q-lastq, n); /* Big gap */
974			} else {
975	1280		bmdiff = precomp_bm[qdiff];
976	1280	50	if (bmdiff == 0) {
977	0	0	if (precomp_bm[qdiff-1] != 0)
978	0		bmdiff = mulmod(mulmod(precomp_bm[qdiff-1],bm,n),bm,n);
979			else
980	0		bmdiff = powmod(bm, q-lastq, n);
981	0		precomp_bm[qdiff] = bmdiff;
982			}
983			}
984	1280		a = mulmod(a, bmdiff, n);
985	1280	50	if (a == 0) break;
986	1280		b = mulmod(b, a-1, n); /* if b == 0, we found multiple factors */
987	1280	100	if ( (j++ % 64) == 0 ) {
988	20		f = gcd_ui(b, n);
989	20	100	if (f != 1)
990	1		break;
991			}
992			} END_DO_FOR_EACH_PRIME
993	1		f = gcd_ui(b, n);
994			}
995	8		return found_factor(n, f, factors);
996			}
997
998			/* Simple Williams p+1 */
999	6		static void pp1_pow(UV *cX, UV exp, UV n)
1000			{
1001	6		UV X0 = *cX;
1002	6		UV X = *cX;
1003	6		UV Y = mulsubmod(X, X, 2, n);
1004	6	50	UV bit = UVCONST(1) << (clz(exp)-1);
1005	345	100	while (bit) {
1006	339		UV T = mulsubmod(X, Y, X0, n);
1007	339	100	if ( exp & bit ) {
1008	15		X = T;
1009	15		Y = mulsubmod(Y, Y, 2, n);
1010			} else {
1011	324		Y = T;
1012	324		X = mulsubmod(X, X, 2, n);
1013			}
1014	339		bit >>= 1;
1015			}
1016	6		*cX = X;
1017	6		}
1018	2		int pplus1_factor(UV n, UV *factors, UV B1)
1019			{
1020			UV X1, X2, f;
1021	2		UV sqrtB1 = isqrt(B1);
1022	2	50	MPUassert( (n >= 3) && ((n%2) != 0) , "bad n in pplus1_factor");
		50
1023
1024	2		X1 = 7 % n;
1025	2		X2 = 11 % n;
1026	2		f = 1;
1027	4	50	START_DO_FOR_EACH_PRIME(2, B1) {
		50
		50
		100
		100
		0
		0
		0
		0
		0
		50
1028	4		UV k = p;
1029	4	50	if (p < sqrtB1) {
1030	4		UV kmin = B1/p;
1031	21	100	while (k <= kmin)
1032	17		k *= p;
1033			}
1034	4		pp1_pow(&X1, k, n);
1035	4	50	if (X1 != 2) {
1036	4		f = gcd_ui( submod(X1, 2, n), n);
1037	4	100	if (f != 1 && f != n) break;
		50
1038			}
1039	2		pp1_pow(&X2, k, n);
1040	2	50	if (X2 != 2) {
1041	0		f = gcd_ui( submod(X2, 2, n), n);
1042	0	0	if (f != 1 && f != n) break;
		0
1043			}
1044			} END_DO_FOR_EACH_PRIME
1045
1046	2		return found_factor(n, f, factors);
1047			}
1048
1049
1050			/* SQUFOF, based on Ben Buhrow's racing version. */
1051			#if 1
1052			/* limit to 62-bit inputs, use 32-bit types, faster */
1053			#define SQUFOF_TYPE uint32_t
1054			#define SQUFOF_MAX (UV_MAX >> 2)
1055			#else
1056			/* All 64-bit inputs possible, though we severely limit multipliers */
1057			#define SQUFOF_TYPE UV
1058			#define SQUFOF_MAX UV_MAX
1059			#endif
1060			typedef struct
1061			{
1062			int valid;
1063			SQUFOF_TYPE P;
1064			SQUFOF_TYPE bn;
1065			SQUFOF_TYPE Qn;
1066			SQUFOF_TYPE Q0;
1067			SQUFOF_TYPE b0;
1068			SQUFOF_TYPE it;
1069			SQUFOF_TYPE imax;
1070			SQUFOF_TYPE mult;
1071			} mult_t;
1072
1073			/* N < 2^63 (or 2^31). Returns 0 or a factor */
1074	3		static UV squfof_unit(UV n, mult_t* mult_save)
1075			{
1076			SQUFOF_TYPE imax,i,Q0,Qn,bn,b0,P,bbn,Ro,S,So,t1,t2;
1077			uint32_t root;
1078
1079	3		P = mult_save->P;
1080	3		bn = mult_save->bn;
1081	3		Qn = mult_save->Qn;
1082	3		Q0 = mult_save->Q0;
1083	3		b0 = mult_save->b0;
1084	3		i = mult_save->it;
1085	3		imax = i + mult_save->imax;
1086
1087			#define SQUARE_SEARCH_ITERATION \
1088			t1 = P; \
1089			P = bn*Qn - P; \
1090			t2 = Qn; \
1091			Qn = Q0 + bn*(t1-P); \
1092			Q0 = t2; \
1093			bn = (b0 + P) / Qn; \
1094			i++;
1095
1096	0		while (1) {
1097	3		int j = 0;
1098	3	50	if (i & 0x1) {
1099	0		SQUARE_SEARCH_ITERATION;
1100			}
1101			/* i is now even */
1102			while (1) {
1103			/* We need to know P, bn, Qn, Q0, iteration count, i from prev */
1104	4	50	if (i >= imax) {
1105			/* save state and try another multiplier. */
1106	0		mult_save->P = P;
1107	0		mult_save->bn = bn;
1108	0		mult_save->Qn = Qn;
1109	0		mult_save->Q0 = Q0;
1110	0		mult_save->it = i;
1111	0		return 0;
1112			}
1113
1114	4		SQUARE_SEARCH_ITERATION;
1115
1116			/* Even iteration. Check for square: Qn = SS /
1117	4	100	if (is_perfect_square_ret(Qn,&root))
1118	3		break;
1119
1120			/* Odd iteration. */
1121	1		SQUARE_SEARCH_ITERATION;
1122			}
1123	3		S = root; /* isqrt(Qn); */
1124	3		mult_save->it = i;
1125
1126			/* Reduce to G0 */
1127	3		Ro = P + S*((b0 - P)/S);
1128	3		So = (n - (UV)Ro*(UV)Ro)/(UV)S;
1129	3		bbn = (b0+Ro)/So;
1130
1131			/* Search for symmetry point */
1132			#define SYMMETRY_POINT_ITERATION \
1133			t1 = Ro; \
1134			Ro = bbn*So - Ro; \
1135			t2 = So; \
1136			So = S + bbn*(t1-Ro); \
1137			S = t2; \
1138			bbn = (b0+Ro)/So; \
1139			if (Ro == t1) break;
1140
1141	3		j = 0;
1142			while (1) {
1143	3	100	SYMMETRY_POINT_ITERATION;
1144	1	50	SYMMETRY_POINT_ITERATION;
1145	0	0	SYMMETRY_POINT_ITERATION;
1146	0	0	SYMMETRY_POINT_ITERATION;
1147	0	0	if (j++ > 2000000) {
1148	0		mult_save->valid = 0;
1149	0		return 0;
1150			}
1151			}
1152
1153	3		t1 = gcd_ui(Ro, n);
1154	3	50	if (t1 > 1)
1155	3		return t1;
1156			}
1157			}
1158
1159			/* Gower and Wagstaff 2008:
1160			* http://www.ams.org/journals/mcom/2008-77-261/S0025-5718-07-02010-8/
1161			* Section 5.3. I've added some with 13,17,19. Sorted by F(). */
1162			static const UV squfof_multipliers[] =
1163			/* { 35711, 357, 3511, 35, 3711, 37, 5711, 57,
1164			311, 3, 511, 5, 711, 7, 11, 1 }; /
1165			{ 35711, 357, 3571113, 35713, 3571117, 3511,
1166			35717, 35, 3571119, 351113,35719, 3571317,
1167			3513, 3711, 37, 5711, 3713, 57,
1168			3517, 5713, 3519, 311, 37*17, 3,
1169			31113, 511, 3719, 313, 5, 51113,
1170			5719, 513, 711, 7, 317, 713,
1171			11, 1 };
1172			#define NSQUFOF_MULT (sizeof(squfof_multipliers)/sizeof(squfof_multipliers[0]))
1173
1174	2		int squfof_factor(UV n, UV *factors, UV rounds)
1175			{
1176			mult_t mult_save[NSQUFOF_MULT];
1177	2		UV i, nn64, sqrtnn64, mult, f64,rounds_done = 0;
1178	2		int mults_racing = NSQUFOF_MULT;
1179
1180			/* Caller should have handled these trivial cases */
1181	2	50	MPUassert( (n >= 3) && ((n%2) != 0) , "bad n in squfof_factor");
		50
1182
1183			/* Too big */
1184	2	50	if (n > SQUFOF_MAX)
1185	0		return no_factor(n,factors);
1186
1187	78	100	for (i = 0; i < NSQUFOF_MULT; i++) {
1188	76		mult_save[i].valid = -1;
1189	76		mult_save[i].it = 0;
1190			}
1191
1192			/* Race each multiplier for a bit (20-20k rounds) */
1193	2	50	while (mults_racing > 0 && rounds_done < rounds) {
		50
1194	3	50	for (i = 0; i < NSQUFOF_MULT && rounds_done < rounds; i++) {
		50
1195	3	50	if (mult_save[i].valid == 0) continue;
1196	3		mult = squfof_multipliers[i];
1197	3		nn64 = n * mult;
1198	3	50	if (mult_save[i].valid == -1) {
1199	3	50	if ((SQUFOF_MAX / mult) < n) {
1200	0		mult_save[i].valid = 0; /* This multiplier would overflow 64-bit */
1201	0		mults_racing--;
1202	0		continue;
1203			}
1204	3		sqrtnn64 = isqrt(nn64);
1205	3		mult_save[i].valid = 1;
1206	3		mult_save[i].Q0 = 1;
1207	3		mult_save[i].b0 = sqrtnn64;
1208	3		mult_save[i].P = sqrtnn64;
1209	3		mult_save[i].Qn = (SQUFOF_TYPE)(nn64 - sqrtnn64 * sqrtnn64);
1210	3	50	if (mult_save[i].Qn == 0)
1211	0		return found_factor(n, sqrtnn64, factors);
1212	3		mult_save[i].bn = (2 * sqrtnn64) / (UV)mult_save[i].Qn;
1213	3		mult_save[i].it = 0;
1214	3		mult_save[i].mult = mult;
1215	3		mult_save[i].imax = (UV) (sqrt(sqrtnn64) / 16);
1216	3	50	if (mult_save[i].imax < 20) mult_save[i].imax = 20;
1217	3	50	if (mult_save[i].imax > rounds) mult_save[i].imax = rounds;
1218			}
1219	3	50	if (mults_racing == 1) /* Do all rounds if only one multiplier left */
1220	0		mult_save[i].imax = (rounds - rounds_done);
1221	3		f64 = squfof_unit(nn64, &mult_save[i]);
1222	3	50	if (f64 > 1) {
1223	3		UV f64red = f64 / gcd_ui(f64, mult);
1224	3	100	if (f64red > 1) {
1225			/* unsigned long totiter = 0;
1226			{int K; for (K = 0; K < NSQUFOF_MULT; K++) totiter += mult_save[K].it; }
1227			printf(" n %lu mult %lu it %lu (%lu)\n",n,mult,totiter,(UV)mult_save[i].it); */
1228	2		return found_factor(n, f64red, factors);
1229			}
1230			/* Found trivial factor. Quit working with this multiplier. */
1231	1		mult_save[i].valid = 0;
1232			}
1233	1	50	if (mult_save[i].valid == 0)
1234	1		mults_racing--;
1235	1		rounds_done += mult_save[i].imax; /* Assume we did all rounds */
1236			}
1237			}
1238	0		return no_factor(n,factors);
1239			}
1240
1241			#define SQR_TAB_SIZE 512
1242			static int sqr_tab_init = 0;
1243			static double sqr_tab[SQR_TAB_SIZE];
1244	0		static void make_sqr_tab(void) {
1245			int i;
1246	0	0	for (i = 0; i < SQR_TAB_SIZE; i++)
1247	0		sqr_tab[i] = sqrt((double)i);
1248	0		sqr_tab_init = 1;
1249	0		}
1250
1251			/* Lehman written and tuned by Warren D. Smith.
1252			* Revised by Ben Buhrow and Dana Jacobsen. */
1253	2		int lehman_factor(UV n, UV *factors, bool do_trial) {
1254	2	50	const double Tune = ((n >> 31) >> 5) ? 3.5 : 5.0;
1255			double x, sqrtn;
1256			UV a,c,kN,kN4,B2;
1257	2		uint32_t b,p,k,r,B,U,Bred,inc,ip=2;
1258
1259	2	50	if (!(n&1)) return found_factor(n, 2, factors);
1260
1261	2		B = Tune * (1+icbrt(n));
1262
1263	2	50	if (do_trial) {
1264	2		uint32_t FirstCut = 0.1 * B;
1265	2	50	if (FirstCut < 84) FirstCut = 84;
1266	2	50	if (FirstCut > 65535) FirstCut = 65535;
1267	10	50	for (ip = 2; ip < NPRIMES_SMALL; ip++) {
1268	10		p = primes_small[ip];
1269	10	50	if (p >= FirstCut)
1270	0		break;
1271	10	100	if (n % p == 0)
1272	2		return found_factor(n, p, factors);
1273			}
1274			}
1275
1276			#if BITS_PER_WORD == 64
1277	0	0	if (n >= UVCONST(8796393022207)) return no_factor(n,factors);
1278			#endif
1279	0		Bred = B / (Tune * Tune * Tune);
1280	0		B2 = B*B;
1281	0		kN = 0;
1282
1283	0	0	if (!sqr_tab_init) make_sqr_tab();
1284	0		sqrtn = sqrt(n);
1285
1286	0	0	for (k = 1; k <= Bred; k++) {
1287	0	0	if (k&1) { inc = 4; r = (k+n) % 4; }
1288	0		else { inc = 2; r = 1; }
1289	0		kN += n;
1290			#if BITS_PER_WORD == 64
1291	0	0	if (kN >= UVCONST(1152921504606846976)) return no_factor(n,factors);
1292			#endif
1293	0		kN4 = kN*4;
1294
1295	0	0	x = (k < SQR_TAB_SIZE) ? sqrtn * sqr_tab[k] : sqrt((double)kN);
1296	0		a = x;
1297	0	0	if ((UV)a * (UV)a == kN)
1298	0		return found_factor(n, gcd_ui(a,n), factors);
1299	0		x *= 2;
1300	0		a = x + 0.9999999665; /* Magic constant */
1301	0		b = a % inc;
1302	0		b = a + (inc+r-b) % inc;
1303	0		c = (UV)b*(UV)b - kN4;
1304	0		U = x + B2/(2*x);
1305	0	0	for (a = b; a <= U; c += inc*(a+a+inc), a += inc) {
1306			/* Check for perfect square */
1307	0	0	if (is_perfect_square_ret(c,&b)) {
1308	0		B2 = gcd_ui(a+b, n);
1309	0		return found_factor(n, B2, factors);
1310			}
1311			}
1312			}
1313	0	0	if (do_trial) {
1314	0	0	if (B > 65535) B = 65535;
1315			/* trial divide from primes[ip] to B. We could:
1316			* 1) use table of 6542 shorts for the primes.
1317			* 2) use a wheel
1318			* 3) let trial_factor handle it
1319			*/
1320	0	0	if (ip >= NPRIMES_SMALL) ip = NPRIMES_SMALL-1;
1321	0		return trial_factor(n, factors, primes_small[ip], B);
1322			}
1323	0		return no_factor(n,factors);
1324			}
1325
1326			/* Chebyshev polynomials of the first kind T_n(x) = V_n(2x,1) / 2. */
1327			/* Basic algorithm from Daniel "Trizen" Șuteu */
1328	3		int cheb_factor(UV n, UV *factors, UV B, UV initx)
1329			{
1330			UV sqrtB, inv, x, f, i;
1331
1332	3	100	if (B == 0) { B = log2floor(n); B = 8BB; }
		50
1333	3	100	if (B > isqrt(n)) B = isqrt(n);
1334	3		sqrtB = isqrt(B);
1335	3		inv = modinverse(2,n); /* multiplying by this will divide by two */
1336	3	50	x = (initx == 0) ? 72 : initx;
1337	3		f = 1;
1338
1339	99	50	START_DO_FOR_EACH_PRIME(2, B) {
		50
		100
		100
		100
		100
		50
		50
		50
		50
		50
1340	99	100	if (p <= sqrtB) {
1341	14		UV lgp = logint(B, p);
1342	14		UV plgp = ipowsafe(p, lgp);
1343	14	50	if (plgp < UV_MAX) {
1344	14		x = mulmod(lucasvmod(addmod(x,x,n), 1, plgp, n), inv, n);
1345			} else {
1346	0	0	for (i = 1; i <= lgp; i++)
1347	0		x = mulmod(lucasvmod(addmod(x,x,n), 1, p, n), inv, n);
1348			}
1349			} else {
1350	85		x = mulmod(lucasvmod(addmod(x,x,n), 1, p, n), inv, n);
1351			}
1352	99	100	f = gcd_ui(x-1, n); if (f > 1) break;
1353			} END_DO_FOR_EACH_PRIME
1354
1355	3	50	if (f > 1 && f < n)
		50
1356	3		return found_factor(n, f, factors);
1357	0		return no_factor(n,factors);
1358			}
1359
1360
1361
1362			static const uint32_t _fr_chunk = 256*1024;
1363
1364			/* Help performance by doing a cube root sieve for small ranges */
1365	6		static bool _fr_full_sieve(UV sqrtn, UV range) /* range = hi-lo */
1366			{
1367	6	50	if (sqrtn < 10000000U) return 1; /* Below 10^14 */
1368	0	0	if (sqrtn < 35000000U) return (range > 900); /* Below 10^15 */
1369	0	0	if (sqrtn < 100000000U) return (range > 1700); /* Below 10^16 */
1370	0	0	if (sqrtn < 350000000U) return (range > 3400); /* Below 10^17 */
1371	0	0	if (sqrtn < 1000000000U) return (range > 5500); /* Below 10^18 */
1372	0	0	if (sqrtn < 3500000000U) return (range > 17000); /* Below 10^19 */
1373	0		return (range > 19000);
1374			}
1375
1376	3		static void _vec_factor(UV lo, UV hi, UV nfactors, UV farray, UV noffset, bool square_free)
1377			{
1378			UV *N, j, n, sqrthi, sievelim;
1379	3		sqrthi = isqrt(hi);
1380	3		n = hi-lo+1;
1381	3	50	New(0, N, hi-lo+1, UV);
1382	3013	100	for (j = 0; j < n; j++) {
1383	3010		N[j] = 1;
1384	3010		nfactors[j] = 0;
1385			}
1386	3	50	sievelim = _fr_full_sieve(sqrthi, hi-lo) ? sqrthi : icbrt(hi);
1387	408	50	START_DO_FOR_EACH_PRIME(2, sievelim) {
		50
		100
		100
		100
		100
		50
		50
		50
		50
		100
1388			UV q, t, A;
1389	405	100	if (square_free == 0) {
1390	6		UV kmin = hi / p;
1391	20	100	for (q = p; q <= kmin; q *= p) {
1392	14		t = lo / q, A = t * q;
1393	14	100	if (A < lo) A += q;
1394	477	100	for (j = A-lo; j < n; j += q) {
1395	463		farray[ j*noffset + nfactors[j]++ ] = p;
1396	463		N[j] *= p;
1397			}
1398			}
1399			} else {
1400	399		q = pp, t = lo / q, A = t q;
1401	399	50	if (A < lo) A += q;
1402	1639	100	for (j = A-lo; j < n; j += q) {
1403	1240		N[j] = 0;
1404	1240		nfactors[j] = 0;
1405			}
1406	399		q = p, t = lo / q, A = t * q;
1407	399	100	if (A < lo) A += q;
1408	6093	100	for (j = A-lo; j < n; j += q) {
1409	5694	100	if (N[j] > 0) {
1410	3121		farray[ j*noffset + nfactors[j]++ ] = p;
1411	3121		N[j] *= p;
1412			}
1413			}
1414			}
1415			} END_DO_FOR_EACH_PRIME
1416
1417	3	50	if (sievelim == sqrthi) {
1418			/* Handle the unsieved results, which are prime */
1419	3013	100	for (j = 0; j < n; j++) {
1420	3010	100	if (N[j] == 1)
1421	316		farray[ j*noffset + nfactors[j]++ ] = j+lo;
1422	2694	100	else if (N[j] > 0 && N[j] != j+lo)
		100
1423	1138		farray[ j*noffset + nfactors[j]++ ] = (j+lo) / N[j];
1424			}
1425			} else {
1426			/* Handle the unsieved results, which are prime or semi-prime */
1427	0	0	for (j = 0; j < n; j++) {
1428	0		UV rem = j+lo;
1429	0	0	if (N[j] > 0 && N[j] != rem) {
		0
1430	0	0	if (N[j] != 1)
1431	0		rem /= N[j];
1432	0	0	if (square_free && is_perfect_square(rem)) {
		0
1433	0		nfactors[j] = 0;
1434			} else {
1435	0		UV* f = farray + j*noffset + nfactors[j];
1436	0		nfactors[j] += factor_one(rem, f, 1, 0);
1437			}
1438			}
1439			}
1440			}
1441	3		Safefree(N);
1442	3		}
1443
1444	15		factor_range_context_t factor_range_init(UV lo, UV hi, bool square_free) {
1445			factor_range_context_t ctx;
1446	15		ctx.lo = lo;
1447	15		ctx.hi = hi;
1448	15		ctx.n = lo-1;
1449	15		ctx.is_square_free = square_free;
1450	15	100	if (hi-lo+1 > 100) { /* Sieve in chunks */
1451	3	100	if (square_free) ctx._noffset = (hi <= 42949672965UL) ? 10 : 15;
		50
1452	1	50	else ctx._noffset = BITS_PER_WORD - clz(hi);
1453	3		ctx._coffset = _fr_chunk;
1454	3		New(0, ctx._nfactors, _fr_chunk, UV);
1455	3	50	New(0, ctx._farray, _fr_chunk * ctx._noffset, UV);
1456			{ /* Prealloc all the sieving primes now. */
1457	3		UV t = isqrt(hi);
1458	3	50	if (!_fr_full_sieve(t, hi-lo)) t = icbrt(hi);
1459	3		get_prime_cache(t, 0);
1460			}
1461			} else { /* factor each number */
1462	12	100	New(0, ctx.factors, square_free ? 15 : 63, UV);
1463	12		ctx._nfactors = 0;
1464	12		ctx._farray = ctx.factors;
1465	12		ctx._noffset = 0;
1466			}
1467	15		return ctx;
1468			}
1469
1470	3259		int factor_range_next(factor_range_context_t *ctx) {
1471			int j, nfactors;
1472			UV n;
1473	3259	50	if (ctx->n >= ctx->hi)
1474	0		return -1;
1475	3259		n = ++(ctx->n);
1476	3259	100	if (ctx->_nfactors) {
1477	3010	100	if (ctx->_coffset >= _fr_chunk) {
1478	3		UV clo = n;
1479	3		UV chi = n + _fr_chunk - 1;
1480	3	50	if (chi > ctx->hi \|\| chi < clo) chi = ctx->hi;
		0
1481	3		_vec_factor(clo, chi, ctx->_nfactors, ctx->_farray, ctx->_noffset, ctx->is_square_free);
1482	3		ctx->_coffset = 0;
1483			}
1484	3010		nfactors = ctx->_nfactors[ctx->_coffset];
1485	3010		ctx->factors = ctx->_farray + ctx->_coffset * ctx->_noffset;
1486	3010		ctx->_coffset++;
1487			} else {
1488	249	100	if (ctx->is_square_free && n >= 49 && (!(n% 4) \|\| !(n% 9) \|\| !(n%25) \|\| !(n%49)))
		100
		100
		100
		100
		100
1489	22		return 0;
1490	227		nfactors = factor(n, ctx->factors);
1491	227	100	if (ctx->is_square_free) {
1492	118	100	for (j = 1; j < nfactors; j++)
1493	58	100	if (ctx->factors[j] == ctx->factors[j-1])
1494	17		break;
1495	77	100	if (j < nfactors) return 0;
1496			}
1497			}
1498	3220		return nfactors;
1499			}
1500
1501	15		void factor_range_destroy(factor_range_context_t *ctx) {
1502	15	50	if (ctx->_farray != 0) Safefree(ctx->_farray);
1503	15	100	if (ctx->_nfactors != 0) Safefree(ctx->_nfactors);
1504	15		ctx->_farray = ctx->_nfactors = ctx->factors = 0;
1505	15		}
1506
1507			/******************************************************************************/
1508			/* Find number of factors for all values in a range */
1509			/******************************************************************************/
1510
1511	4		unsigned char* range_nfactor_sieve(UV lo, UV hi, bool with_multiplicity) {
1512			unsigned char* nf;
1513	4		UV *N, i, range = hi-lo+1, sqrtn = isqrt(hi);
1514
1515	4		Newz(0, nf, range, unsigned char);
1516	4	50	New(0, N, range, UV);
1517
1518			/* We could set to 1 and sieve from 2, or do this initialization */
1519	10224	100	for (i = lo; i <= hi && i >= lo; i++) {
		50
1520	10220		N[i-lo] = 1;
1521	10220	100	if (!(i&1) && i >= 2) {
		50
1522	5112		UV k = i >> 1;
1523	5112		unsigned char nz = 1;
1524	10212	100	while (!(k&1)) { nz++; k >>= 1; }
1525	5112	50	nf[i-lo] = (with_multiplicity) ? nz : 1;
1526	5112		N[i-lo] = UVCONST(1) << nz;
1527			}
1528			}
1529
1530	48	50	START_DO_FOR_EACH_PRIME(3, sqrtn) {
		50
		100
		50
		100
		100
		50
		50
		50
		50
		100
1531	44		UV pk, maxpk = UV_MAX/p; \
1532	13130	100	for (i = P_GT_LO_0(p,p,lo); i < range; i += p)
		100
1533	13086		{ N[i] *= p; nf[i]++; }
1534	114	100	for (pk = pp; pk <= hi; pk = p) {
1535	2816	100	for (i = P_GT_LO_0(pk,pk,lo); i < range; i += pk)
		100
1536	2746	50	{ N[i] *= p; if (with_multiplicity) nf[i]++; }
1537	70	50	if (pk >= maxpk) break; /* Overflow protection */
1538			}
1539			} END_DO_FOR_EACH_PRIME
1540
1541	10224	100	for (i = 0; i < range; i++)
1542	10220	100	if (N[i] < (lo+i))
1543	6821		nf[i]++;
1544	4		Safefree(N);
1545	4	50	if (lo == 0) nf[0] = 1;
1546	4		return nf;
1547			}
1548
1549
1550			/******************************************************************************/
1551			/* DLP */
1552			/******************************************************************************/
1553
1554	26		static UV dlp_trial(UV a, UV g, UV p, UV maxrounds) {
1555			UV k, t;
1556	26	100	if (maxrounds > p) maxrounds = p;
1557
1558			#if USE_MONTMATH
1559	26	100	if (p&1) {
1560	21		const uint64_t npi = mont_inverse(p), mont1 = mont_get1(p);
1561	21		g = mont_geta(g, p);
1562	21		a = mont_geta(a, p);
1563	13934	50	for (t = g, k = 1; k < maxrounds; k++) {
1564	13934	100	if (t == a)
1565	21		return k;
1566	13913	50	t = mont_mulmod(t, g, p);
1567	13913	50	if (t == g) break; /* Stop at cycle */
1568			}
1569			} else
1570			#endif
1571			{
1572	9	50	for (t = g, k = 1; k < maxrounds; k++) {
1573	9	100	if (t == a)
1574	4		return k;
1575	5		t = mulmod(t, g, p);
1576	5	100	if (t == g) break; /* Stop at cycle */
1577			}
1578			}
1579	1		return 0;
1580			}
1581
1582			/******************************************************************************/
1583			/* DLP - Pollard Rho */
1584			/******************************************************************************/
1585
1586			/* Compare with Pomerance paper (dartmouth dtalk4):
1587			* Type I/II/III = our case 1, 0, 2.
1588			* x_i = u, a_i = v, b_i = w
1589			*
1590			* Also see Bai/Brent 2008 for many ideas to speed this up.
1591			* https://maths-people.anu.edu.au/~brent/pd/rpb231.pdf
1592			* E.g. Teske adding-walk, Brent's cycle algo, Teske modified cycle
1593			*/
1594			#define pollard_rho_cycle(u,v,w,p,n,a,g) \
1595			switch (u % 3) { \
1596			case 0: u = mulmod(u,u,p); v = mulmod(v,2,n); w = mulmod(w,2,n); break;\
1597			case 1: u = mulmod(u,a,p); v = addmod(v,1,n); break;\
1598			case 2: u = mulmod(u,g,p); w = addmod(w,1,n); break;\
1599			}
1600
1601			typedef struct prho_state_t {
1602			UV u;
1603			UV v;
1604			UV w;
1605			UV U;
1606			UV V;
1607			UV W;
1608			UV round;
1609			int failed;
1610			int verbose;
1611			} prho_state_t;
1612
1613	7		static UV dlp_prho_uvw(UV a, UV g, UV p, UV n, UV rounds, prho_state_t *s) {
1614	7		UV i, k = 0;
1615	7		UV u=s->u, v=s->v, w=s->w;
1616	7		UV U=s->U, V=s->V, W=s->W;
1617	7		int const verbose = s->verbose;
1618
1619	7	50	if (s->failed) return 0;
1620	7	50	if (s->round + rounds > n) rounds = n - s->round;
1621
1622	41391	100	for (i = 1; i <= rounds; i++) {
1623	41388		pollard_rho_cycle(u,v,w,p,n,a,g); /* xi, ai, bi */
1624	41388		pollard_rho_cycle(U,V,W,p,n,a,g);
1625	41388		pollard_rho_cycle(U,V,W,p,n,a,g); /* x2i, a2i, b2i */
1626	41388	50	if (verbose > 3) printf( "%3"UVuf" %4"UVuf" %3"UVuf" %3"UVuf" %4"UVuf" %3"UVuf" %3"UVuf"\n", i, u, v, w, U, V, W );
1627	41388	100	if (u == U) {
1628			UV r1, r2, G, G2;
1629	4		r1 = submod(v, V, n);
1630	4	50	if (r1 == 0) {
1631	0	0	if (verbose) printf("DLP Rho failure, r=0\n");
1632	0		s->failed = 1;
1633	0		k = 0;
1634	0		break;
1635			}
1636	4		r2 = submod(W, w, n);
1637
1638	4		G = gcd_ui(r1,n);
1639	4		G2 = gcd_ui(G,r2);
1640	4		k = divmod(r2/G2, r1/G2, n/G2);
1641	4	100	if (G > 1) {
1642	1	50	if (powmod(g,k,p) == a) {
1643	0	0	if (verbose > 2) printf(" common GCD %"UVuf"\n", G2);
1644			} else {
1645	1		UV m, l = divmod(r2, r1, n/G);
1646	129	100	for (m = 0; m < G; m++) {
1647	128		k = addmod(l, mulmod(m,(n/G),n), n);
1648	128	50	if (powmod(g,k,p) == a) break;
1649			}
1650	1	50	if (m 2) printf(" GCD %"UVuf", found with m=%"UVuf"\n", G, m);
		0
1651			}
1652			}
1653
1654	4	100	if (powmod(g,k,p) != a) {
1655	1	50	if (verbose > 2) printf("r1 = %"UVuf" r2 = %"UVuf" k = %"UVuf"\n", r1, r2, k);
1656	1	50	if (verbose) printf("Incorrect DLP Rho solution: %"UVuf"\n", k);
1657	1		s->failed = 1;
1658	1		k = 0;
1659			}
1660	4		break;
1661			}
1662			}
1663	7		s->round += i-1;
1664	7	50	if (verbose && k) printf("DLP Rho solution found after %"UVuf" steps\n", s->round + 1);
		0
1665	7		s->u = u; s->v = v; s->w = w; s->U = U; s->V = V; s->W = W;
1666	7		return k;
1667			}
1668
1669			#if 0
1670			static UV dlp_prho(UV a, UV g, UV p, UV n, UV maxrounds) {
1671			#ifdef DEBUG
1672			int const verbose = _XS_get_verbose()
1673			#else
1674			int const verbose = 0;
1675			#endif
1676			prho_state_t s = {1, 0, 0, 1, 0, 0, 0, 0, verbose};
1677			return dlp_prho_uvw(a, g, p, n, maxrounds, &s);
1678			}
1679			#endif
1680
1681
1682			/******************************************************************************/
1683			/* DLP - BSGS */
1684			/******************************************************************************/
1685
1686			typedef struct bsgs_hash_t {
1687			UV M; /* The baby step index */
1688			UV V; /* The powmod value */
1689			struct bsgs_hash_t* next;
1690			} bsgs_hash_t;
1691
1692			/****************************************/
1693			/* Simple and limited pool allocation */
1694			#define BSGS_ENTRIES_PER_PAGE 8000
1695			typedef struct bsgs_page_top_t {
1696			struct bsgs_page_t* first;
1697			bsgs_hash_t** table;
1698			UV size;
1699			int nused;
1700			int npages;
1701			} bsgs_page_top_t;
1702
1703			typedef struct bsgs_page_t {
1704			bsgs_hash_t entries[BSGS_ENTRIES_PER_PAGE];
1705			struct bsgs_page_t* next;
1706			} bsgs_page_t;
1707
1708	6923		static bsgs_hash_t* get_entry(bsgs_page_top_t* top) {
1709	6923	100	if (top->nused == 0 \|\| top->nused >= BSGS_ENTRIES_PER_PAGE) {
		50
1710			bsgs_page_t* newpage;
1711	4		Newz(0, newpage, 1, bsgs_page_t);
1712	4		newpage->next = top->first;
1713	4		top->first = newpage;
1714	4		top->nused = 0;
1715	4		top->npages++;
1716			}
1717	6923		return top->first->entries + top->nused++;
1718			}
1719	4		static void destroy_pages(bsgs_page_top_t* top) {
1720	4		bsgs_page_t* head = top->first;
1721	8	100	while (head != 0) {
1722	4		bsgs_page_t* next = head->next;
1723	4		Safefree(head);
1724	4		head = next;
1725			}
1726	4		top->first = 0;
1727	4		}
1728			/****************************************/
1729
1730	4		static void bsgs_hash_put(bsgs_page_top_t* pagetop, UV v, UV i) {
1731	4		UV idx = v % pagetop->size;
1732	4		bsgs_hash_t** table = pagetop->table;
1733	4		bsgs_hash_t* entry = table[idx];
1734
1735	4	50	while (entry && entry->V != v)
		0
1736	0		entry = entry->next;
1737
1738	4	50	if (!entry) {
1739	4		entry = get_entry(pagetop);
1740	4		entry->M = i;
1741	4		entry->V = v;
1742	4		entry->next = table[idx];
1743	4		table[idx] = entry;
1744			}
1745	4		}
1746
1747	828		static UV bsgs_hash_get(bsgs_page_top_t* pagetop, UV v) {
1748	828		bsgs_hash_t* entry = pagetop->table[v % pagetop->size];
1749	857	100	while (entry && entry->V != v)
		100
1750	29		entry = entry->next;
1751	828	100	return (entry) ? entry->M : 0;
1752			}
1753
1754	6919		static UV bsgs_hash_put_get(bsgs_page_top_t* pagetop, UV v, UV i) {
1755	6919		UV idx = v % pagetop->size;
1756	6919		bsgs_hash_t** table = pagetop->table;
1757	6919		bsgs_hash_t* entry = table[idx];
1758
1759	7092	100	while (entry && entry->V != v)
		50
1760	173		entry = entry->next;
1761
1762	6919	50	if (entry)
1763	0		return entry->M;
1764
1765	6919		entry = get_entry(pagetop);
1766	6919		entry->M = i;
1767	6919		entry->V = v;
1768	6919		entry->next = table[idx];
1769	6919		table[idx] = entry;
1770	6919		return 0;
1771			}
1772
1773	5		static UV dlp_bsgs(UV a, UV g, UV p, UV n, UV maxent, bool race_rho) {
1774			bsgs_page_top_t PAGES;
1775			UV i, m, maxm, hashmap_count;
1776			UV aa, S, gm, T, gs_i, bs_i;
1777	5		UV result = 0;
1778			#ifdef DEBUG
1779			int const verbose = _XS_get_verbose();
1780			#else
1781	5		int const verbose = 0;
1782			#endif
1783	5		prho_state_t rho_state = {1, 0, 0, 1, 0, 0, 0, 0, verbose};
1784
1785	5	50	if (n <= 2) return 0; /* Shouldn't be here with gorder this low */
1786
1787	5	50	if (race_rho) {
1788	5		result = dlp_prho_uvw(a, g, p, n, 10000, &rho_state);
1789	5	100	if (result) {
1790	1	50	if (verbose) printf("rho found solution in BSGS step 0\n");
1791	1		return result;
1792			}
1793			}
1794
1795	4	50	if (a == 0) return 0; /* We don't handle this case */
1796
1797	4		maxm = isqrt(n);
1798	4		m = (maxent > maxm) ? maxm : maxent;
1799
1800	4	50	hashmap_count = (m < 65537) ? 65537 :
1801	0	0	(m > 40000000) ? 40000003 :
1802	0		next_prime(m); /* Ave depth around 2 */
1803
1804			/* Create table. Size: 8hashmap_count bytes. /
1805	4		PAGES.size = hashmap_count;
1806	4		PAGES.first = 0;
1807	4		PAGES.nused = 0;
1808	4		PAGES.npages = 0;
1809	4	50	Newz(0, PAGES.table, hashmap_count, bsgs_hash_t*);
1810
1811	4		aa = mulmod(a,a,p);
1812	4		S = a;
1813	4		gm = powmod(g, m, p);
1814	4		T = gm;
1815	4		gs_i = 0;
1816	4		bs_i = 0;
1817
1818	4		bsgs_hash_put(&PAGES, S, 0); /* First baby step */
1819	4		S = mulmod(S, g, p);
1820			/* Interleaved Baby Step Giant Step */
1821	3462	100	for (i = 1; i <= m; i++) {
1822	3460		gs_i = bsgs_hash_put_get(&PAGES, S, i);
1823	3460	50	if (gs_i) { bs_i = i; break; }
1824	3460		S = mulmod(S, g, p);
1825	3460	100	if (S == aa) { /* We discovered the solution! */
1826	1	50	if (verbose) printf(" dlp bsgs: solution at BS step %"UVuf"\n", i+1);
1827	1		result = i+1;
1828	1		break;
1829			}
1830	3459		bs_i = bsgs_hash_put_get(&PAGES, T, i);
1831	3459	50	if (bs_i) { gs_i = i; break; }
1832	3459		T = mulmod(T, gm, p);
1833	3459	50	if (race_rho && (i % 2048) == 0) {
		100
1834	1		result = dlp_prho_uvw(a, g, p, n, 100000, &rho_state);
1835	1	50	if (result) {
1836	1	50	if (verbose) printf("rho found solution in BSGS step %"UVuf"\n", i);
1837	1		break;
1838			}
1839			}
1840			}
1841
1842	4	100	if (!result) {
1843			/* Extend Giant Step search */
1844	2	50	if (!(gs_i \|\| bs_i)) {
		50
1845	2		UV b = (p+m-1)/m;
1846	2	50	if (m < maxm && b > 8m) b = 8m;
		50
1847	828	50	for (i = m+1; i < b; i++) {
1848	828		bs_i = bsgs_hash_get(&PAGES, T);
1849	828	100	if (bs_i) { gs_i = i; break; }
1850	827		T = mulmod(T, gm, p);
1851	827	50	if (race_rho && (i % 2048) == 0) {
		100
1852	1		result = dlp_prho_uvw(a, g, p, n, 100000, &rho_state);
1853	1	50	if (result) {
1854	1	50	if (verbose) printf("rho found solution in BSGS step %"UVuf"\n", i);
1855	1		break;
1856			}
1857			}
1858			}
1859			}
1860
1861	2	100	if (gs_i \|\| bs_i) {
		50
1862	1		result = submod(mulmod(gs_i, m, p), bs_i, p);
1863			}
1864			}
1865	4	50	if (verbose) printf(" dlp bsgs using %d pages (%.1fMB+%.1fMB) for hash\n", PAGES.npages, ((double)PAGES.npages * sizeof(bsgs_page_t)) / (10241024), ((double)hashmap_count sizeof(bsgs_hash_t)) / (10241024));
1866
1867	4		destroy_pages(&PAGES);
1868	4		Safefree(PAGES.table);
1869	4	50	if (result != 0 && powmod(g,result,p) != a) {
		50
1870	0	0	if (verbose) printf("Incorrect DLP BSGS solution: %"UVuf"\n", result);
1871	0		result = 0;
1872			}
1873	4	50	if (race_rho && result == 0) {
		50
1874	0		result = dlp_prho_uvw(a, g, p, n, 2000000000U, &rho_state);
1875			}
1876	4		return result;
1877			}
1878
1879			/* Find smallest k where a = g^k mod p */
1880			#define DLP_TRIAL_NUM 10000
1881	21		UV znlog_solve(UV a, UV g, UV p, UV n) {
1882			UV k, sqrtn;
1883	21		const int verbose = _XS_get_verbose();
1884
1885	21	50	if (a >= p) a %= p;
1886	21	50	if (g >= p) g %= p;
1887
1888	21	100	if (a == 1 \|\| g == 0 \|\| p <= 2)
		50
		50
1889	1		return 0;
1890
1891	20	50	if (verbose > 1 && n != p-1) printf(" g=%"UVuf" p=%"UVuf", order %"UVuf"\n", g, p, n);
		0
1892
1893			/* printf(" solving znlog(%"UVuf",%"UVuf",%"UVuf") n=%"UVuf"\n", a, g, p, n); */
1894
1895	20	50	if (n == 0 \|\| n <= DLP_TRIAL_NUM) {
		100
1896	15		k = dlp_trial(a, g, p, DLP_TRIAL_NUM);
1897	15	50	if (verbose) printf(" dlp trial 10k %s\n", (k!=0 \|\| p <= DLP_TRIAL_NUM) ? "success" : "failure");
		0
		0
1898	15	50	if (k != 0 \|\| (n > 0 && n <= DLP_TRIAL_NUM)) return k;
		0
		0
1899			}
1900
1901			{ /* Existence checks */
1902	5		UV aorder, gorder = n;
1903	5	50	if (gorder != 0 && powmod(a, gorder, p) != 1) return 0;
		50
1904	5		aorder = znorder(a,p);
1905	5	50	if (aorder == 0 && gorder != 0) return 0;
		0
1906	5	50	if (aorder != 0 && gorder % aorder != 0) return 0;
		50
1907			}
1908
1909			/* This is confusing */
1910	5	50	sqrtn = (n == 0) ? 0 : isqrt(n);
1911	5	50	if (n == 0) n = p-1;
1912
1913			{
1914	5	50	UV maxent = (sqrtn > 0) ? sqrtn+1 : 100000;
1915	5		k = dlp_bsgs(a, g, p, n, maxent/2, /* race rho */ 1);
1916	5	50	if (verbose) printf(" dlp bsgs %"UVuf"k %s\n", maxent/1000, k!=0 ? "success" : "failure");
		0
1917	5	50	if (k != 0) return k;
1918	0	0	if (sqrtn > 0 && sqrtn < maxent) return 0;
		0
1919			}
1920
1921	0	0	if (verbose) printf(" dlp doing exhaustive trial\n");
1922	0		k = dlp_trial(a, g, p, p);
1923	0		return k;
1924			}
1925
1926			/* Silver-Pohlig-Hellman */
1927	6		static UV znlog_ph(UV a, UV g, UV p, UV p1) {
1928			factored_t pf;
1929			UV x, sol[MPU_MAX_DFACTORS], mod[MPU_MAX_DFACTORS];
1930			uint32_t i;
1931
1932	6	50	if (p1 == 0) return 0; /* TODO: Should we plow on with p1=p-1? */
1933	6		pf = factorint(p1);
1934	6	100	if (pf.nfactors == 1)
1935	1		return znlog_solve(a, g, p, p1);
1936	21	100	for (i = 0; i < pf.nfactors; i++) {
1937	16		UV pi = ipow(pf.f[i],pf.e[i]);
1938	16		UV delta = powmod(a,p1/pi,p);
1939	16		UV gamma = powmod(g,p1/pi,p);
1940			/* printf(" solving znlog(%"UVuf",%"UVuf",%"UVuf")\n", delta, gamma, p); */
1941	16		sol[i] = znlog_solve( delta, gamma, p, znorder(gamma,p) );
1942	16		mod[i] = pi;
1943			}
1944	5	50	if (chinese(&x, 0, sol, mod, pf.nfactors) == 1 && powmod(g, x, p) == a)
		50
1945	5		return x;
1946	0		return 0;
1947			}
1948
1949			/* Find smallest k where a = g^k mod p */
1950	22		UV znlog(UV a, UV g, UV p) {
1951			UV k, gorder, aorder;
1952	22		const int verbose = _XS_get_verbose();
1953
1954	22	50	if (a >= p) a %= p;
1955	22	50	if (g >= p) g %= p;
1956
1957	22	100	if (a == 1 \|\| g == 0 \|\| p <= 2)
		50
		50
1958	2		return 0;
1959
1960			/* TODO: We call znorder with the same p many times. We should have a
1961			* method for znorder given {phi,nfactors,fac,exp} */
1962
1963	20		gorder = znorder(g,p);
1964	20	100	if (gorder != 0 && powmod(a, gorder, p) != 1) return 0;
		100
1965			/* TODO: Can these tests every fail? Do we need aorder? */
1966	18		aorder = znorder(a,p);
1967	18	100	if (aorder == 0 && gorder != 0) return 0;
		50
1968	18	100	if (aorder != 0 && gorder % aorder != 0) return 0;
		50
1969
1970			/* TODO: Come up with a better solution for a=0 */
1971	18	100	if (a == 0 \|\| p < DLP_TRIAL_NUM \|\| (gorder > 0 && gorder < DLP_TRIAL_NUM)) {
		100
		50
		50
1972	11	50	if (verbose > 1) printf(" dlp trial znlog(%"UVuf",%"UVuf",%"UVuf")\n",a,g,p);
1973	11		k = dlp_trial(a, g, p, p);
1974	11		return k;
1975			}
1976
1977	7	100	if (!is_prob_prime(gorder)) {
1978	6		k = znlog_ph(a, g, p, gorder);
1979	6	50	if (verbose) printf(" dlp PH %s\n", k!=0 ? "success" : "failure");
		0
1980	6	50	if (k != 0) return k;
1981			}
1982
1983	1		return znlog_solve(a, g, p, gorder);
1984			}
1985
1986
1987			/* Compile with:
1988			* gcc -O3 -fomit-frame-pointer -march=native -Wall -DSTANDALONE -DFACTOR_STANDALONE factor.c util.c primality.c cache.c sieve.c chacha.c csprng.c prime_counts.c prime_count_cache.c lmo.c legendre_phi.c real.c inverse_interpolate.c rootmod.c lucas_seq.c prime_powers.c sort.c -lm
1989			*/
1990			#ifdef FACTOR_STANDALONE
1991			#include
1992			int main(int argc, char *argv[])
1993			{
1994			UV n;
1995			UV factors[MPU_MAX_FACTORS+1];
1996			int nfactors, i, a;
1997
1998			if (argc <= 1) {
1999			char line[1024];
2000			while (1) {
2001			if (!fgets(line,sizeof(line),stdin)) break;
2002			n = strtoull(line, 0, 10);
2003			nfactors = factor(n, factors);
2004			if (nfactors == 1) {
2005			printf("%"UVuf": %"UVuf"\n",n,n);
2006			} else if (nfactors == 2) {
2007			printf("%"UVuf": %"UVuf" %"UVuf"\n",n,factors[0],factors[1]);
2008			} else if (nfactors == 3) {
2009			printf("%"UVuf": %"UVuf" %"UVuf" %"UVuf"\n",n,factors[0],factors[1],factors[2]);
2010			} else {
2011			printf("%"UVuf": %"UVuf" %"UVuf" %"UVuf" %"UVuf"",n,factors[0],factors[1],factors[2],factors[3]);
2012			for (i = 4; i < nfactors; i++) printf(" %"UVuf"", factors[i]);
2013			printf("\n");
2014			}
2015			}
2016			exit(0);
2017			}
2018
2019			for (a = 1; a < argc; a++) {
2020			n = strtoul(argv[a], 0, 10);
2021			if (n == ULONG_MAX && errno == ERANGE) { printf("Argument larger than ULONG_MAX\n"); return(-1); }
2022			nfactors = factor(n, factors);
2023			printf("%"UVuf":", n);
2024			for (i = 0; i < nfactors; i++)
2025			printf(" %"UVuf"", factors[i]);
2026			printf("\n");
2027			}
2028
2029			return(0);
2030			}
2031			#endif