File Coverage

totients.c

Criterion	Covered	Total	%
statement	240	297	80.8
branch	185	252	73.4
condition			n/a
subroutine			n/a
pod			n/a
total	425	549	77.4

line	stmt	bran	code
1			#include
2			#include
3			#include
4
5			#define FUNC_isqrt 1
6			#define FUNC_ipow 1
7			#include "ptypes.h"
8			#include "sort.h"
9			#include "totients.h"
10			#include "sieve.h"
11			#include "util.h"
12			#include "factor.h"
13			#include "keyval.h"
14
15
16	684		static UV _totient_fac(uint32_t nfacs, UV* facs) {
17			uint32_t i;
18	684		UV totient = 1, lastf = 0;
19			/* n=0 is factored as (0) so it correctly returns 0. */
20	1953	100	for (i = 0; i < nfacs; i++) {
21	1269		UV f = facs[i];
22	1269	100	if (f == lastf) { totient *= f; }
23	974		else { totient *= f-1; lastf = f; }
24			}
25	684		return totient;
26			}
27
28
29	644		UV totient(UV n) {
30			#if 1
31			UV factors[MPU_MAX_FACTORS+1];
32	644		uint32_t nfactors = factor(n, factors);
33	644		return _totient_fac(nfactors, factors);
34			#else
35			factored_t nf;
36			UV totient;
37			uint32_t i;
38
39			if (n <= 0) return n;
40			nf = factorint(n);
41			for (i = 0, totient = 1; i < nf.nfactors; i++) {
42			UV f = nf.f[i];
43			unsigned e = nf.e[i];
44			totient *= f-1;
45			while (e-- > 1)
46			totient *= f;
47			}
48			return totient;
49			#endif
50			}
51
52
53	78		UV* range_totient(UV lo, UV hi) {
54	78		UV i, count = hi-lo+1, *totients;
55
56	78	50	if (hi < lo \|\| count == 0 \|\| count > (Size_t)((SSize_t)-1))
		50
57	0		croak("range_totient error hi %"UVuf" < lo %"UVuf"\n", hi, lo);
58
59	78	100	if (hi < 16) {
60			static const uint8_t small_totients[] = {0,1,1,2,2,4,2,6,4,6,4,10,4,12,6,8};
61	67	50	New(0, totients, count, UV);
62	316	100	for (i = 0; i < count; i++)
63	249		totients[i] = small_totients[lo+i];
64	67		return totients;
65			}
66
67	11	100	if (lo > 0) { /* With a non-zero start, use our ranged factoring */
68
69			factor_range_context_t fctx;
70	6		fctx = factor_range_init(lo, hi, 0);
71	6	50	New(0, totients, count, UV);
72	46	100	for (i = 0; i < count; i++) {
73	40		uint32_t nfactors = factor_range_next(&fctx);
74	40		totients[i] = _totient_fac(nfactors, fctx.factors);
75			}
76	6		factor_range_destroy(&fctx);
77
78			} else { /* start at zero */
79
80			uint32_t j, *prime;
81	5		uint32_t sqrthi = isqrt(hi);
82	5		uint32_t nprimes = 0;
83
84	5	50	if (hi == UV_MAX)
85	0		croak("range_totient error hi %"UVuf" < lo %"UVuf"\n", hi, lo);
86
87			/* prime[] will hold primes from 3 to sqrthi */
88	5	50	New(0, prime, max_nprimes(sqrthi), uint32_t);
89	5	50	Newz(0, totients, count, UV);
90	5		totients[1] = 1;
91
92	2068	100	for (i = 1; i <= hi/2; i += 2) {
93	2063		UV toti = totients[i];
94	2063	100	if (toti == 0) {
95	612		totients[i] = toti = i-1;
96	612	100	if (i <= sqrthi)
97	38		prime[nprimes++] = i;
98			}
99	4409	100	for (j = 0; j < nprimes; j++) {
100	4400		UV index = i*prime[j];
101	4400	100	if (index > hi) break;
102	3010	100	if (i % prime[j] == 0) {
103	664		totients[index] = toti * prime[j];
104	664		break;
105			}
106	2346		totients[index] = toti * (prime[j] - 1);
107			}
108			/* Fill in even values as we go */
109	2063		totients[i*2] = toti;
110	2063	100	if (i+1 <= hi/2)
111	2062		totients[2i+2] = totients[i+1] 2;
112			}
113	5		Safefree(prime);
114
115			/* All totient values have been filled in except the primes. Mark them. */
116	2069	100	for (; i <= hi; i += 2)
117	2064	100	if (totients[i] == 0)
118	500		totients[i] = i-1;
119
120			}
121
122	11		return totients;
123			}
124
125
126
127			/******************************************************************************/
128
129
130			#define HAVE_SUMTOTIENT_128 (BITS_PER_WORD == 64 && HAVE_UINT128)
131			#if BITS_PER_WORD == 64
132			# define MAX_TOTSUM UVCONST(7790208950)
133			#else
134			# define MAX_TOTSUM 118868
135			#endif
136			/* sumtotient(7790208950) = 2^64 - 1664739356 */
137			/* sumtotient(7790208951) = 2^64 + 2584983748 */
138
139
140			/* Direct method: split the computation into two loops running over sqrtn.
141			*
142			* Page 7 of https://www.mimuw.edu.pl/~pan/papers/farey-esa.pdf
143			* https://math.stackexchange.com/a/1740370/117584
144			*/
145
146	62		static UV _sumtotient_direct(UV n) {
147			UV finalsum, *sumcache2;
148			uint32_t sqrtn, sum, i, j, k, *sumcache1;
149			bool flag;
150
151	62	50	if (n <= 2) return n;
152	62	50	if (n > MAX_TOTSUM) return 0;
153
154	62		sqrtn = isqrt(n);
155	62		flag = (n < (UV)sqrtn * ((UV)sqrtn+1)); /* Does n/r == r ? */
156
157	62		sumcache2 = range_totient(0, sqrtn);
158
159	62		New(0, sumcache1, sqrtn+1, uint32_t);
160	173	100	for (sum = 1, i = 2; i <= sqrtn; i++) {
161	111		sum += sumcache2[i];
162	111		sumcache1[i] = sum;
163			}
164	62	100	if (flag) sumcache2[sqrtn] = sumcache1[sqrtn];
165
166	203	100	for (i = sqrtn - flag; i > 0; i--) {
167	141		const UV m = n/i;
168	141		const uint32_t s = isqrt(m);
169	141		UV sum = (m+1)/2 * (m\|1); /* m(m+1)/2; /
170	141		sum -= (m - m/2); /* k=1 */
171
172	292	100	for (k = 2, j = k*i; j <= sqrtn; k++) {
173	151		sum -= sumcache2[j];
174	151		sum -= (m/k - m/(k+1)) * sumcache1[k];
175	151		j += i;
176			}
177	254	100	for (; k <= s; k++) {
178	113		sum -= sumcache1[m/k];
179	113		sum -= (m/k - m/(k+1)) * sumcache1[k];
180			}
181	141	100	if (m < (UV)s * ((UV)s+1))
182	75		sum += sumcache1[s];
183
184	141		sumcache2[i] = sum;
185			}
186	62		finalsum = sumcache2[1];
187	62		Safefree(sumcache1);
188	62		Safefree(sumcache2);
189	62		return finalsum;
190			}
191
192
193			/* Recursive method using a cache. */
194
195			typedef struct {
196			UV hsize;
197			UV nhash; / n value */
198			UV shash; / sum for n */
199			} sumt_hash_t;
200			#define _CACHED_SUMT(x) \
201			(((x)
202
203	378		static UV _sumt(UV n, const UV *cdata, UV csize, sumt_hash_t thash) {
204			UV sum, s, k, lim, hn;
205	378		uint32_t const probes = 8;
206			uint32_t hashk;
207
208	378	50	if (n < csize) return cdata[n];
209
210	378		hn = n % thash.hsize;
211	437	50	for (hashk = 0; hashk <= probes; hashk++) {
212	437	100	if (thash.nhash[hn] == n) return thash.shash[hn];
213	170	100	if (thash.nhash[hn] == 0) break;
214	59	100	hn = (hn+1 < thash.hsize) ? hn+1 : 0;
215			}
216
217	111		s = isqrt(n);
218	111		lim = n/(s+1);
219
220	111		sum = (n+1)/2 * (n\|1); /* (n(n+1))/2 /
221	111		sum -= (n - n/2);
222	14845	100	for (k = 2; k <= lim; k++) {
223	14734	100	sum -= _CACHED_SUMT(n/k);
224	14734	50	sum -= ((n/k) - (n/(k+1))) * _CACHED_SUMT(k);
225			}
226	111	100	if (s > lim)
227	51	50	sum -= ((n/s) - (n/(s+1))) * _CACHED_SUMT(s);
228
229	112	50	for (; hashk <= probes; hashk++) {
230	112	100	if (thash.nhash[hn] == 0) {
231	111		thash.nhash[hn] = n;
232	111		thash.shash[hn] = sum;
233	111		break;
234			}
235	1	50	hn = (hn+1 < thash.hsize) ? hn+1 : 0;
236			}
237	111		return sum;
238			}
239
240	81		UV sumtotient(UV n) {
241			UV sum, i, cbrtn, csize, *sumcache;
242			sumt_hash_t thash;
243
244	81	100	if (n <= 2) return n;
245	64	50	if (n > MAX_TOTSUM) return 0;
246
247	64	100	if (n < 4000) return _sumtotient_direct(n);
248
249	2		cbrtn = icbrt(n);
250	2		csize = cbrtn * cbrtn;
251
252	2		sumcache = range_totient(0, csize-1);
253	7923	100	for (i = 2; i < csize; i++)
254	7921		sumcache[i] += sumcache[i-1];
255
256	2		thash.hsize = next_prime(10 + 4*cbrtn);
257	2	50	Newz(0, thash.nhash, thash.hsize, UV);
258	2	50	New( 0, thash.shash, thash.hsize, UV);
259
260	2		sum = _sumt(n, sumcache, csize, thash);
261	2		Safefree(thash.nhash);
262	2		Safefree(thash.shash);
263	2		Safefree(sumcache);
264	2		return sum;
265			}
266
267
268
269			#if HAVE_SUMTOTIENT_128
270			#define _CACHED_SUMT128(x) \
271			(((x)
272			typedef struct {
273			UV hsize;
274			UV nhash; / n value */
275			uint128_t shash; / sum for n */
276			} sumt_hash_128_t;
277	0		static uint128_t _sumt128(UV n, const UV *cdata, UV csize, sumt_hash_128_t thash) {
278			uint128_t sum;
279			UV s, k, lim, hn;
280	0		uint32_t const probes = 16;
281	0		uint32_t const hinc = 1 + ((n >> 8) & 15); /* mitigate clustering */
282			uint32_t hashk;
283
284	0	0	if (n < csize) return cdata[n];
285
286	0		hn = n % thash.hsize;
287	0	0	for (hashk = 0; hashk <= probes; hashk++) {
288	0	0	if (thash.nhash[hn] == n) return thash.shash[hn];
289	0	0	if (thash.nhash[hn] == 0) break;
290	0	0	hn = (hn+hinc < thash.hsize) ? hn+hinc : hn+hinc-thash.hsize;
291			}
292
293	0		s = isqrt(n);
294	0		lim = n/(s+1);
295
296	0		sum = ((uint128_t)n+1)/2 * (n\|1); /* (n(n+1))/2 /
297	0		sum -= (n - n/2);
298	0	0	for (k = 2; k <= lim; k++) {
299	0	0	sum -= _CACHED_SUMT128(n/k);
300	0	0	sum -= ((n/k) - (n/(k+1))) * _CACHED_SUMT128(k);
301			}
302	0	0	if (s > lim)
303	0	0	sum -= ((n/s) - (n/(s+1))) * _CACHED_SUMT128(s);
304
305	0	0	for (; hashk <= probes; hashk++) {
306	0	0	if (thash.nhash[hn] == 0) {
307	0		thash.nhash[hn] = n;
308	0		thash.shash[hn] = sum;
309	0		break;
310			}
311	0	0	hn = (hn+hinc < thash.hsize) ? hn+hinc : hn+hinc-thash.hsize;
312			}
313
314	0		return sum;
315			}
316
317	0		int sumtotient128(UV n, UV hi_sum, UV lo_sum) {
318			UV i, cbrtn, csize, hsize, *sumcache;
319			uint128_t sum;
320			sumt_hash_128_t thash;
321
322	0	0	if (n <= 2) { hi_sum = 0; lo_sum = n; return 1; }
323			/* sumtotient(2^64-1) < 2^128, so we can't overflow. */
324
325	0		cbrtn = icbrt(n);
326	0		csize = 0.6 * cbrtn * cbrtn;
327	0		hsize = 8 * cbrtn; /* 12.5% filled with csize = 1 * n^(2/3) */
328
329	0	0	if (csize > 400000000U) { /* Limit to 3GB */
330	0		csize = 400000000;
331	0		hsize = isqrt(n);
332			}
333
334	0		sumcache = range_totient(0, csize-1);
335	0	0	for (i = 2; i < csize; i++)
336	0		sumcache[i] += sumcache[i-1];
337
338			/* Arguably we should expand the hash as it fills. */
339	0		thash.hsize = next_prime( 16 + hsize );
340	0	0	Newz(0, thash.nhash, thash.hsize, UV);
341	0	0	New( 0, thash.shash, thash.hsize, uint128_t);
342
343	0		sum = _sumt128(n, sumcache, csize, thash);
344	0		*hi_sum = (sum >> 64) & UV_MAX;
345	0		*lo_sum = (sum ) & UV_MAX;
346
347	0	0	if (_XS_get_verbose() >= 2) {
348	0		UV filled = 0;
349	0	0	for (i = 0; i < thash.hsize; i++)
350	0		filled += (thash.nhash[i] != 0);
351	0		printf(" 128-bit totsum phi %6.1lfMB hash size %6.1lfMB, fill: %6.2lf%%\n", csizesizeof(UV)/1048576.0, thash.hsize3sizeof(UV)/1048576.0, 100.0 (double)filled / (double)thash.hsize);
352			}
353
354	0		Safefree(thash.nhash);
355	0		Safefree(thash.shash);
356	0		Safefree(sumcache);
357	0		return 1;
358			}
359			#else
360			int sumtotient128(UV n, UV hi_sum, UV lo_sum) {
361			return 0;
362			}
363			#endif
364
365
366			/******************************************************************************/
367			/******************************************************************************/
368
369
370			static const UV jordan_overflow[5] =
371			#if BITS_PER_WORD == 64
372			{UVCONST(4294967311), 2642249, 65537, 7133, 1627};
373			#else
374			{UVCONST( 65537), 1627, 257, 85, 41};
375			#endif
376	490		UV jordan_totient(UV k, UV n) {
377			factored_t nf;
378			uint32_t i;
379			UV totient;
380
381	490	50	if (k == 0 \|\| n <= 1) return (n == 1);
		100
382	479	100	if (k > 6 \|\| (k > 1 && n >= jordan_overflow[k-2])) return 0;
		100
		50
383
384	457		totient = 1;
385			/* Similar to Euler totient, shortcut even inputs */
386	662	100	while ((n & 0x3) == 0) { n >>= 1; totient *= (1<
387	457	100	if ((n & 0x1) == 0) { n >>= 1; totient *= ((1<
388
389	457		nf = factorint(n);
390	960	100	for (i = 0; i < nf.nfactors; i++) {
391	503		UV p = nf.f[i];
392	503		UV e = nf.e[i];
393	503		UV pk = ipow(p,k);
394	503		totient *= (pk-1);
395	580	100	while (e-- > 1)
396	77		totient *= pk;
397			}
398	457		return totient;
399			}
400
401
402			/******************************************************************************/
403
404
405	462		static bool _totpred(UV n, UV maxd) {
406			UV i, ndivisors, *divs;
407			bool res;
408
409	462	100	if (n & 1) return 0;
410	247	100	if ((n & (n-1)) == 0) return 1;
411	237		n >>= 1;
412	237	50	if (n == 1) return 1;
413	237	100	if (n < maxd && is_prime(2*n+1)) return 1;
		100
414
415	220		divs = divisor_list(n, &ndivisors, maxd);
416	1074	100	for (i = 0, res = 0; i < ndivisors && divs[i] < maxd && res == 0; i++) {
		100
		100
417	854		UV r, d = divs[i], p = 2*d+1;
418	854	100	if (!is_prime(p)) continue;
419	346		r = n/d;
420			while (1) {
421	400	100	if (r == p \|\| _totpred(r, d)) { res = 1; break; }
		100
422	387	100	if (r % p) break;
423	54		r /= p;
424			}
425			}
426	220		Safefree(divs);
427	220		return res;
428			}
429
430	125		bool is_totient(UV n) {
431	125	100	return (n == 0 \|\| (n & 1)) ? (n==1) : _totpred(n,n);
		100
432			}
433
434
435			/******************************************************************************/
436
437
438	104		UV inverse_totient_count(UV n) {
439			set_t set, sumset;
440			keyval_t keyval;
441			UV res, i, ndivisors, *divs;
442
443	104	100	if (n == 1) return 2;
444	103	100	if (n < 1 \|\| n & 1) return 0;
		100
445	53	100	if (is_prime(n >> 1)) { /* Coleman Remark 3.3 (Thm 3.1) and Prop 6.2 */
446	15	100	if (!is_prime(n+1)) return 0;
447	7	100	if (n >= 10) return 2;
448			}
449
450	40		divs = divisor_list(n, &ndivisors, n);
451
452	40		init_set(&set, 2*ndivisors);
453	40		keyval.key = 1; keyval.val = 1;
454	40		set_addsum(&set, keyval);
455
456	2231	100	for (i = 0; i < ndivisors; i++) {
457	2191		UV d = divs[i], p = d+1;
458	2191	100	if (is_prime(p)) {
459	734		UV j, np = d, v = valuation(n, p);
460	734		init_set(&sumset, ndivisors/2);
461	1621	100	for (j = 0; j <= v; j++) {
462	887		UV k, ndiv = n/np; /* Loop over divisors of n/np */
463	887	100	if (np == 1) {
464	40		keyval_t kv; kv.key = 1; kv.val = 1;
465	40		set_addsum(&sumset, kv);
466			} else {
467	467221	50	for (k = 0; k < ndivisors && divs[k] <= ndiv; k++) {
		100
468	466374		UV val, d2 = divs[k];
469	466374	100	if ((ndiv % d2) != 0) continue;
470	84137		val = set_getval(set, d2);
471	84137	100	if (val > 0) {
472	40356		keyval_t kv; kv.key = d2*np; kv.val = val;
473	40356		set_addsum(&sumset, kv);
474			}
475			}
476			}
477			/* if (j < v && np > UV_MAX/p) croak("overflow np d %lu", d); */
478	887		np *= p;
479			}
480	734		set_merge(&set, sumset);
481	734		free_set(&sumset);
482			}
483			}
484	40		Safefree(divs);
485	40		res = set_getval(set, n);
486	40		free_set(&set);
487	40		return res;
488			}
489
490	10		UV* inverse_totient_list(UV *ntotients, UV n) {
491			set_list_t setlist, divlist;
492			UV i, ndivisors, divs, tlist;
493	10		UV *totlist = 0;
494
495	10	100	if (n == 1) {
496	1		New(0, totlist, 2, UV);
497	1		totlist[0] = 1; totlist[1] = 2;
498	1		*ntotients = 2;
499	1		return totlist;
500			}
501	9	100	if (n < 1 \|\| n & 1) {
		100
502	2		*ntotients = 0;
503	2		return totlist;
504			}
505	7	100	if (is_prime(n >> 1)) { /* Coleman Remark 3.3 (Thm 3.1) and Prop 6.2 */
506	3	100	if (!is_prime(n+1)) {
507	1		*ntotients = 0;
508	1		return totlist;
509	2	50	} else if (n >= UV_MAX/2) { /* overflow */
510	0		*ntotients = UV_MAX;
511	0		return totlist;
512	2	100	} else if (n >= 10) {
513	1		New(0, totlist, 2, UV);
514	1		totlist[0] = n+1; totlist[1] = 2*n+2;
515	1		*ntotients = 2;
516	1		return totlist;
517			}
518			}
519
520			/* Check for possible overflow in the inner loop.
521			* Smallest 32-bit overflow is at 716636160 with 272 divisors.
522			* 1145325184 with <= 16 divisors
523			* 64-bit overflow: 2459565884898017280 < n <= 2772864768682229760.
524			*/
525	5	50	if (n >= (BITS_PER_WORD == 64 ? UVCONST(2459565884898017280) : 716636160UL)) {
526	0		*ntotients = UV_MAX;
527	0		return totlist;
528			}
529
530	5		divs = divisor_list(n, &ndivisors, n);
531
532	5		init_setlist(&setlist, 2*ndivisors);
533	5		setlist_addval(&setlist, 1, 1); /* Add 1 => [1] */
534
535	107	100	for (i = 0; i < ndivisors; i++) {
536	102		UV d = divs[ndivisors - i - 1], p = d+1; /* Divisors in reverse order */
537	102	100	if (is_prime(p)) {
538	37		UV j, dp = d, pp = p, v = valuation(n, p);
539	37		init_setlist(&divlist, ndivisors/2);
540	95	100	for (j = 0; j <= v; j++) {
541	58		UV k, ndiv = n/dp; /* Loop over divisors of n/dp */
542	1288	100	for (k = 0; k < ndivisors && divs[k] <= ndiv; k++) {
		100
543	1230		UV nvals, *vals, d2 = divs[k];
544	1476	100	if ((ndiv % d2) != 0) continue;
545			/* For the last divisor [1], don't add intermediate values */
546	680	100	if (d == 1 && d2*dp != n) continue;
		100
547	434		vals = setlist_getlist(&nvals, setlist, d2);
548	434	100	if (vals != 0)
549	136		setlist_addlist(&divlist, d2 * dp, nvals, vals, pp);
550			}
551	58		dp *= p;
552	58		pp *= p;
553			}
554	37		setlist_merge(&setlist, divlist);
555	37		free_setlist(&divlist);
556			}
557			}
558	5		Safefree(divs);
559
560	5		tlist = setlist_getlist(ntotients, setlist, n);
561	5	50	if (tlist != 0 && *ntotients > 0) {
		50
562	5	50	New(0, totlist, *ntotients, UV);
563	5		memcpy(totlist, tlist, ntotients sizeof(UV));
564	5		sort_uv_array(totlist, *ntotients);
565			}
566	5		free_setlist(&setlist);
567	5		return totlist;
568			}