File Coverage

legendre_phi.c

Criterion	Covered	Total	%
statement	162	343	47.2
branch	111	330	33.6
condition			n/a
subroutine			n/a
pod			n/a
total	273	673	40.5

line	stmt	bran	code
1			#include
2			#include
3			#include
4
5			#define FUNC_isqrt 1
6			#include "sieve.h"
7			#include "util.h"
8			#include "prime_counts.h"
9			#include "prime_count_cache.h"
10			#include "legendre_phi.h"
11
12
13			/*
14			* tablephi / tiny_phi
15			* a must be very small (e.g. 6, 7)
16			* direct answer
17			*
18			* phi_small
19			* a must be very small (e.g. 15)
20			* calls tablephi
21			* simple iteration using fixed size lists
22			*
23			* phi_recurse_small
24			* memoryless recursive
25			* calls phi_small, nth_prime (if a > 25), prev_prime, next_prime
26			* good for very small a (e.g. less than 25)
27			*
28			* phi_recurse
29			* recursive with a cache
30			* calls tablephi, prime_count_cache, phi_recurse internal
31			* generates primes to max(nth_prime(a),isqrt(x))
32			*
33			* phi_walk
34			* iterative using list merges
35			* calls tablephi, prime_count_cache, phi_recurse internal
36			* generates primes to max(nth_prime(a),isqrt(x))
37			* complicated, can be much faster than the others, but uses a lot of memory
38			*
39			* legendre_phi
40			* decides what to do, including handling some special cases
41			*/
42
43			/============================================================================/
44
45			#define FAST_DIV(x,y) \
46			( ((x) <= 4294967295U) ? (uint32_t)(x)/(uint32_t)(y) : (x)/(y) )
47
48			#define PHIC 6U /* phi(x,a) with a <= PHIC can go to tablephi */
49
50			#define PHIS 15U /* phi(x,a) with a <= PHIS can go to phi_small */
51			#define PHIS_XMIN (_snth[PHIS+1]-1U) /* nth_prime(PHIS+1)-1 */
52
53			#define PHIR 20U /* phi(x,a) with a <= PHIR is faster with phi_recurse_small */
54
55			/============================================================================/
56
57			/* For x >= 1 and a >= 4, phi(x,a) = phi(x-_pred7[x%210],a)
58			* This allows us to collapse multiple x values, useful for caching. */
59			static const unsigned char _pred7[210] = {1,0,1,2,3,4,5,6,7,8,9,0,1,0,1,2,3,0,1,0,1,2,3,0,1,2,3,4,5,0,1,0,1,2,3,4,5,0,1,2,3,0,1,0,1,2,3,0,1,2,3,4,5,0,1,2,3,4,5,0,1,0,1,2,3,4,5,0,1,2,3,0,1,0,1,2,3,4,5,0,1,2,3,0,1,2,3,4,5,0,1,2,3,4,5,6,7,0,1,2,3,0,1,0,1,2,3,0,1,0,1,2,3,0,1,2,3,4,5,6,7,0,1,2,3,4,5,0,1,2,3,0,1,2,3,4,5,0,1,0,1,2,3,0,1,2,3,4,5,0,1,0,1,2,3,4,5,0,1,2,3,4,5,0,1,2,3,0,1,0,1,2,3,0,1,2,3,4,5,0,1,0,1,2,3,4,5,0,1,2,3,0,1,0,1,2,3,0,1,0,1,2,3,4,5,6,7,8,9,0};
60
61			/* Maps x to value <= x not divisible by first 4 primes */
62			/* mpu 'say join(",",map { legendre_phi($_,4)-1 } 0..209);' */
63			static const int8_t _coprime_idx210[210]={-1,0,0,0,0,0,0,0,0,0,0,1,1,2,2,2,2,3,3,4,4,4,4,5,5,5,5,5,5,6,6,7,7,7,7,7,7,8,8,8,8,9,9,10,10,10,10,11,11,11,11,11,11,12,12,12,12,12,12,13,13,14,14,14,14,14,14,15,15,15,15,16,16,17,17,17,17,17,17,18,18,18,18,19,19,19,19,19,19,20,20,20,20,20,20,20,20,21,21,21,21,22,22,23,23,23,23,24,24,25,25,25,25,26,26,26,26,26,26,26,26,27,27,27,27,27,27,28,28,28,28,29,29,29,29,29,29,30,30,31,31,31,31,32,32,32,32,32,32,33,33,34,34,34,34,34,34,35,35,35,35,35,35,36,36,36,36,37,37,38,38,38,38,39,39,39,39,39,39,40,40,41,41,41,41,41,41,42,42,42,42,43,43,44,44,44,44,45,45,46,46,46,46,46,46,46,46,46,46,47};
64	7911		static UV _toindex210(UV x) {
65	7911		UV q = x / 210, r = x % 210;
66	7911		return 48 * q + _coprime_idx210[r];
67			}
68
69			/* Small table of nth primes */
70			static const unsigned char _snth[25+1] = {0,2,3,5,7,11,13,17,19,23,29,31,37,41,43,47,53,59,61,67,71,73,79,83,89,97};
71
72			/============================================================================/
73
74			/* static const uint8_t _s0[ 1] = {0};
75			static const uint8_t _s1[ 2] = {0,1};
76			static const uint8_t _s2[ 6] = {0,1,1,1,1,2}; */
77			static const uint8_t _s3[30] = {0,1,1,1,1,1,1,2,2,2,2,3,3,4,4,4,4,5,5,6,6,6,6,7,7,7,7,7,7,8};
78			static const uint8_t _s4[210]= {0,1,1,1,1,1,1,1,1,1,1,2,2,3,3,3,3,4,4,5,5,5,5,6,6,6,6,6,6,7,7,8,8,8,8,8,8,9,9,9,9,10,10,11,11,11,11,12,12,12,12,12,12,13,13,13,13,13,13,14,14,15,15,15,15,15,15,16,16,16,16,17,17,18,18,18,18,18,18,19,19,19,19,20,20,20,20,20,20,21,21,21,21,21,21,21,21,22,22,22,22,23,23,24,24,24,24,25,25,26,26,26,26,27,27,27,27,27,27,27,27,28,28,28,28,28,28,29,29,29,29,30,30,30,30,30,30,31,31,32,32,32,32,33,33,33,33,33,33,34,34,35,35,35,35,35,35,36,36,36,36,36,36,37,37,37,37,38,38,39,39,39,39,40,40,40,40,40,40,41,41,42,42,42,42,42,42,43,43,43,43,44,44,45,45,45,45,46,46,47,47,47,47,47,47,47,47,47,47,48};
79	392195		static UV tablephi(UV x, uint32_t a)
80			{
81	392195		switch (a) {
82	0		case 0: return x;
83	0		case 1: return x-x/2;
84	0		case 2: return x-x/2-x/3+x/6;
85	0		case 3: return (x/ 30U) * 8U + _s3[x % 30U];
86	6		case 4: return (x/ 210U) * 48U + _s4[x % 210U];
87	2		case 5: {
88	2		UV xp = x / 11U;
89	2		return ((x /210) * 48 + _s4[x % 210]) -
90	2		((xp/210) * 48 + _s4[xp % 210]);
91			}
92	392187		case 6:
93			#if PHIC == 6
94			default:
95			#endif
96			{
97	392187		UV xp = x / 11U;
98	392187		UV x2 = x / 13U;
99	392187		UV x2p = x2 / 11U;
100	392187		return ((x /210) * 48 + _s4[x % 210]) -
101	392187		((xp /210) * 48 + _s4[xp % 210]) -
102	392187		((x2 /210) * 48 + _s4[x2 % 210]) +
103	392187		((x2p/210) * 48 + _s4[x2p% 210]);
104			}
105			#if PHIC == 7
106			case 7:
107			default:return tablephi(x,a-1) - tablephi(x/17,a-1); /* Hacky */
108			#endif
109			}
110			}
111			/============================================================================/
112
113			/* Iterate with simple arrays, no merging or cleverness. */
114
115	3		static UV phi_small(UV x, uint32_t a) {
116	3		UV sum = 0, xpos[1025], xneg[1025]; /* For 32-bit x, 848 is enough */
117			uint32_t i, npos, nneg;
118
119	3	50	if (a < 4) {
120	0	0	return (a==0) ? x :
121	0	0	(a==1) ? x-x/2 :
122	0		(a==2) ? x-x/2-x/3+x/6
123	0	0	: (x/30U) * 8U + _s3[x % 30U];
124			}
125	3	50	MPUassert(a <= PHIS, "phi_small: a too large");
126	3	50	if (x < _snth[a+1]) return (x>0);
127
128	13	100	for (npos = nneg = 0, xpos[npos++] = x; a > 4U; a--) {
129	10		uint32_t oneg = nneg, opos = npos;
130	26	100	for (i = 0; i < opos; i++)
131	16	100	if (xpos[i] >= _snth[a])
132	15		xneg[nneg++] = xpos[i]/_snth[a];
133	23	100	for (i = 0; i < oneg; i++)
134	13	100	if (xneg[i] >= _snth[a])
135	12		xpos[npos++] = xneg[i]/_snth[a];
136			}
137	18	100	for (i = 0; i < npos; i++)
138	15		sum += (xpos[i]/210U)*48U + _s4[xpos[i] % 210U];
139	18	100	for (i = 0; i < nneg; i++)
140	15		sum -= (xneg[i]/210U)*48U + _s4[xneg[i] % 210U];
141	3		return sum;
142			}
143
144			/============================================================================/
145
146			/* Recurse until a <= PHIS */
147
148	0		static UV phi_recurse_small(UV x, UV a) {
149			UV sum, i, xp, p, npa;
150
151	0	0	if (x < 1 \|\| a >= x) return (x > 0);
		0
152	0	0	if (a <= PHIS \|\| x <= PHIS_XMIN) return phi_small(x, a);
		0
153
154	0	0	npa = (a <= 25) ? _snth[a] : nth_prime(a);
155	0		sum = phi_small(x, PHIS);
156	0		p = _snth[PHIS];
157	0	0	for (i = PHIS+1; i <= a; i++) {
158	0		p = next_prime(p);
159	0	0	xp = FAST_DIV(x,p);
160	0	0	if (xp < p) {
161	0	0	while (x < npa) {
162	0		a--;
163	0		npa = prev_prime(npa);
164			}
165	0		return (sum - a + i - 1);
166			}
167	0		sum -= phi_recurse_small(xp, i-1);
168			}
169	0		return sum;
170			}
171
172			/============================================================================/
173			/============================================================================/
174
175			/* Cache for phi(x,a) */
176
177			#define PHICACHEA 512
178			typedef struct
179			{
180			uint32_t siz[PHICACHEA]; /* how many entries we have allocated */
181			uint16_t *val[PHICACHEA];
182			uint32_t xlim;
183			} phi_cache_t;
184
185	5		static phi_cache_t* phi_cache_create(uint32_t xlim) {
186			phi_cache_t *cache;
187			int a;
188	5		New(0, cache, 1, phi_cache_t);
189	2565	100	for (a = 0; a < PHICACHEA; a++) {
190	2560		cache->val[a] = 0;
191	2560		cache->siz[a] = 0;
192			}
193	5	50	cache->xlim = (xlim < 0xFFFFFFFFU) ? xlim : xlim-1; /* Reserve 0xFFFFFFFF */
194	5		return cache;
195			}
196
197	5		static void phi_cache_destroy(phi_cache_t* cache) {
198			int a;
199	2565	100	for (a = 0; a < PHICACHEA; a++) {
200	2560	100	if (cache->val[a] != 0)
201	41		Safefree(cache->val[a]);
202			}
203	5		Safefree(cache);
204	5		}
205
206	2487		static void phi_cache_insert(uint32_t x, uint32_t a, IV sum, phi_cache_t* cache) {
207			uint32_t i, newsize;
208	2487	100	if (sum < 0) sum = -sum;
209	2487	50	if (sum > 65535) return; /* If sum is too large for the cache, ignore it. */
210	2487	100	if (x >= cache->siz[a]) {
211	45		newsize = (x >= 0xFFFFFFFFUL-32) ? 0xFFFFFFFFUL-1 : x+32;
212	45	100	if (cache->val[a] == 0) {
213	41		Newz(0, cache->val[a], newsize, uint16_t);
214			} else {
215	4		Renew(cache->val[a], newsize, uint16_t);
216	887	100	for (i = cache->siz[a]; i < newsize; i++) /* Zero the new entries */
217	883		cache->val[a][i] = 0;
218			}
219	45		cache->siz[a] = newsize;
220			}
221	2487		cache->val[a][x] = (uint16_t) sum;
222			}
223
224			/* End of Phi cache definitions */
225
226			/* Struct of everything needed for recursive phi call */
227
228			typedef struct {
229			const uint32_t* primes;
230			uint32_t lastidx;
231			void* cachepc;
232			phi_cache_t* cachephi;
233			} phidata_t;
234
235	5		static phidata_t* phidata_create(const uint32_t* primes, uint32_t lastidx, UV x, UV a)
236			{
237			phidata_t *d;
238	5		uint32_t xlim = (UV) pow(x, 1.0/2.70);
239	5	100	if (xlim < 256) xlim = 256;
240
241			(void)a; /* Currently unused */
242	5		New(0, d, 1, phidata_t);
243	5		d->primes = primes;
244	5		d->lastidx = lastidx;
245	5		d->cachepc = prime_count_cache_create_with_primes(primes, lastidx);
246	5		d->cachephi = phi_cache_create(xlim);
247	5		return d;
248			}
249	5		static void phidata_destroy(phidata_t *d)
250			{
251	5		phi_cache_destroy(d->cachephi);
252	5		prime_count_cache_destroy(d->cachepc);
253			/* They own the primes */
254	5		Safefree(d);
255	5		}
256
257			#define PHI_IS_X_SMALL(x, a) \
258			( ((x) <= primes[d->lastidx]) && ((x) < (UV)primes[a+1] * primes[a+1]) )
259			#define PHI_PRIMECOUNT(x) \
260			prime_count_cache_lookup(d->cachepc, (x))
261
262			/* The recursive cached phi routine, given the struct with primes and cache */
263
264	12666		static IV _phi3(UV x, UV a, int sign, phidata_t *d)
265			{
266	12666		const uint32_t* const primes = d->primes;
267	12666		phi_cache_t* pcache = d->cachephi;
268			UV mapx;
269
270	12666	100	if (x < primes[a+1])
271	1736		return sign;
272	10930	100	else if (a <= PHIC)
273	142		return sign * tablephi(x,a);
274	10788	100	else if (PHI_IS_X_SMALL(x,a))
		100
275	2366		return sign * (PHI_PRIMECOUNT(x) - a + 1);
276
277			/* Choose a mapping: x, (x+1)>>1, _toindex30(x), _toindex210(x) */
278	8422	100	mapx = (a < PHICACHEA) ? _toindex210(x) : 0;
279
280	8422	100	if (a < PHICACHEA && mapx < pcache->siz[a]) {
		100
281	4408		IV v = pcache->val[a][mapx];
282	4408	100	if (v != 0)
283	1966		return sign * v;
284			}
285			{
286	6456	100	UV xp, i, iters = ((UV)a*a > x) ? PHI_PRIMECOUNT(isqrt(x)) : a;
287	6456		UV c = (iters > PHIC) ? PHIC : iters;
288	6456		IV sum = sign * (iters - a + tablephi(x,c));
289
290			/* for (i=c; i
291
292	6456	50	if (c < iters)
293	6456	50	sum += -sign * tablephi(FAST_DIV(x,primes[c+1]), c);
294	14002	100	for (i = c+1; i < iters; i++) {
295	12251	50	xp = FAST_DIV(x,primes[i+1]);
296	12251	100	if (PHI_IS_X_SMALL(xp,i))
		100
297	4705		break;
298	7546		sum += _phi3(xp, i, -sign, d);
299			}
300	50723	100	for (; i < iters; i++) {
301	44441	50	xp = FAST_DIV(x,primes[i+1]);
302	44441	100	if (xp < primes[i+1])
303	174		break;
304	44267		sum += -sign * (PHI_PRIMECOUNT(xp) - i + 1);
305			}
306	6456	100	if (i < iters)
307	174		sum += -sign * (iters - i);
308
309	6456	100	if (a < PHICACHEA && mapx <= pcache->xlim)
		100
310	2487		phi_cache_insert(mapx, a, sum, pcache);
311	6456		return sum;
312			}
313			}
314
315	4		static UV phi_recurse(UV x, UV a)
316			{
317			uint32_t* primes;
318			uint32_t lastidx;
319	4		UV primes_to_n, sum = 1;
320
321	4	50	if (x < 1 \|\| a >= x) return (x > 0);
		50
322	4	50	if (a <= PHIS \|\| x <= PHIS_XMIN) return phi_small(x, a);
		50
323	4	50	if (a > 203280221) croak("64-bit phi out of range");
324
325	4		primes_to_n = nth_prime_upper(a);
326	4	50	if (isqrt(x) > primes_to_n) primes_to_n = isqrt(x);
327	4		lastidx = range_prime_sieve_32(&primes, primes_to_n, 1);
328
329	4	50	if (primes[a] < x) {
330	4		phidata_t *d = phidata_create(primes, lastidx, x, a);
331			/* Ensure testing with legendre_phi(1e13, 203280221) +/- 2 */
332			/* sum = (UV) _phi3(x, a, 1, d); */
333	4		sum = (UV) _phi3(x, a-1, 1, d) - (UV) _phi3(x/primes[a], a-1, 1, d);
334	4		phidata_destroy(d);
335			}
336
337	4		Safefree(primes);
338	4		return sum;
339			}
340
341			/============================================================================/
342			/============================================================================/
343
344			static int const verbose = 0;
345			#define MAX_PHI_MEM (89610241024)
346			#define NTHRESH (MAX_PHI_MEM/16)
347
348			/******************************************************************************/
349			/* In-order lists for manipulating our UV value / IV count pairs */
350			/******************************************************************************/
351
352			typedef struct {
353			UV v;
354			IV c;
355			} vc_t;
356
357			typedef struct {
358			vc_t* a;
359			UV size;
360			UV n;
361			} vcarray_t;
362
363	0		static vcarray_t vcarray_create(void)
364			{
365			vcarray_t l;
366	0		l.a = 0;
367	0		l.size = 0;
368	0		l.n = 0;
369	0		return l;
370			}
371	0		static void vcarray_destroy(vcarray_t* l)
372			{
373	0	0	if (l->a != 0) {
374	0	0	if (verbose > 2) printf("FREE list %p\n", l->a);
375	0		Safefree(l->a);
376			}
377	0		l->size = 0;
378	0		l->n = 0;
379	0		}
380
381			/* Insert a value/count pair. Must be done in decreasing size order. */
382	0		static void vcarray_insert(vcarray_t* l, UV val, IV count)
383			{
384	0		UV n = l->n;
385	0		vc_t* arr = l->a;
386
387	0	0	if (n > 0 && arr[n-1].v <= val) {
		0
388	0	0	if (arr[n-1].v == val) {
389	0		arr[n-1].c += count;
390	0		return;
391			}
392	0		croak("Previous value was %lu, inserting %lu out of order\n", arr[n-1].v, val);
393			}
394	0	0	if (n >= l->size) {
395			UV new_size;
396	0	0	if (l->size == 0) {
397	0		new_size = 20000;
398	0	0	if (verbose>2) printf("ALLOCing list, size %lu (%luk)\n", new_size, new_size*sizeof(vc_t)/1024);
399	0	0	New(0, l->a, new_size, vc_t);
400			} else {
401	0		new_size = (UV) (1.5 * l->size);
402	0	0	if (verbose>2) printf("REALLOCing list %p, new size %lu (%luk)\n",l->a,new_size, new_size*sizeof(vc_t)/1024);
403	0	0	Renew( l->a, new_size, vc_t );
404			}
405	0		l->size = new_size;
406	0		arr = l->a;
407			}
408	0		arr[n].v = val;
409	0		arr[n].c = count;
410	0		l->n++;
411			}
412
413			/* Merge the two sorted lists A and B into A. Each list has no duplicates,
414			* but they may have duplications between the two. We're quite interested
415			* in saving memory, so first remove all the duplicates, then do an in-place
416			* merge. */
417	0		static void vcarray_merge(vcarray_t* a, vcarray_t* b)
418			{
419			long ai, bi, bj, k, kn;
420	0		long an = a->n;
421	0		long bn = b->n;
422	0		vc_t* aa = a->a;
423	0		vc_t* ba = b->a;
424
425			/* Merge anything in B that appears in A. */
426	0	0	for (ai = 0, bi = 0, bj = 0; bi < bn; bi++) {
427	0		UV bval = ba[bi].v;
428			/* Skip forward in A until empty or aa[ai].v <= ba[bi].v */
429	0	0	while (ai+8 < an && aa[ai+8].v > bval) ai += 8;
		0
430	0	0	while (ai < an && aa[ai ].v > bval) ai++;
		0
431			/* if A empty then copy the remaining elements */
432	0	0	if (ai >= an) {
433	0	0	if (bi == bj)
434	0		bj = bn;
435			else
436	0	0	while (bi < bn)
437	0		ba[bj++] = ba[bi++];
438	0		break;
439			}
440	0	0	if (aa[ai].v == bval)
441	0		aa[ai].c += ba[bi].c;
442			else
443	0		ba[bj++] = ba[bi];
444			}
445	0	0	if (verbose>3) printf(" removed %lu duplicates from b\n", bn - bj);
446	0		bn = bj;
447
448	0	0	if (bn == 0) { /* In case they were all duplicates */
449	0		b->n = 0;
450	0		return;
451			}
452
453			/* kn = the final merged size. All duplicates are gone, so this is exact. */
454	0		kn = an+bn;
455	0	0	if ((long)a->size < kn) { /* Make A big enough to hold kn elements */
456	0		UV new_size = (UV) (1.2 * kn);
457	0	0	if (verbose>2) printf("REALLOCing list %p, new size %lu (%luk)\n", a->a, new_size, new_size*sizeof(vc_t)/1024);
458	0	0	Renew( a->a, new_size, vc_t );
459	0		aa = a->a; /* this could have been changed by the realloc */
460	0		a->size = new_size;
461			}
462
463			/* merge A and B. Very simple using reverse merge. */
464	0		ai = an-1;
465	0		bi = bn-1;
466	0	0	for (k = kn-1; k >= 0 && bi >= 0; k--) {
		0
467	0		UV bval = ba[bi].v;
468	0		long startai = ai;
469	0	0	while (ai >= 15 && aa[ai-15].v < bval) ai -= 16;
		0
470	0	0	while (ai >= 3 && aa[ai- 3].v < bval) ai -= 4;
		0
471	0	0	while (ai >= 0 && aa[ai ].v < bval) ai--;
		0
472	0	0	if (startai > ai) {
473	0		k = k - (startai - ai) + 1;
474	0		memmove(aa+k, aa+ai+1, (startai-ai) * sizeof(vc_t));
475			} else {
476	0	0	if (ai >= 0 && aa[ai].v == bval) croak("deduplication error");
		0
477	0		aa[k] = ba[bi--];
478			}
479			}
480	0		a->n = kn; /* A now has this many items */
481	0		b->n = 0; /* B is marked empty */
482			}
483
484	0		static void vcarray_remove_zeros(vcarray_t* a)
485			{
486	0		long ai = 0;
487	0		long aj = 0;
488	0		long an = a->n;
489	0		vc_t* aa = a->a;
490
491	0	0	while (aj < an) {
492	0	0	if (aa[aj].c != 0) {
493	0	0	if (ai != aj)
494	0		aa[ai] = aa[aj];
495	0		ai++;
496			}
497	0		aj++;
498			}
499	0		a->n = ai;
500	0		}
501
502			/* phi(x,a) non-recursive, using list merging. Memory intensive. */
503
504	0		static UV phi_walk(UV x, UV a)
505			{
506			UV i, sval, lastidx, lastprime, primes_to_n;
507	0		UV sum = 0;
508			uint32_t* primes;
509			vcarray_t a1, a2;
510			vc_t* arr;
511			phidata_t *d;
512
513	0	0	if (x < 1 \|\| a >= x) return (x > 0);
		0
514	0	0	if (x <= PHIC \|\| a <= PHIC) return tablephi(x, (a > PHIC) ? PHIC : a);
		0
515	0	0	if (a > 203280221) croak("64-bit phi out of range");
516
517	0		primes_to_n = nth_prime_upper(a);
518	0	0	if (isqrt(x) > primes_to_n) primes_to_n = isqrt(x);
519
520	0		lastidx = range_prime_sieve_32(&primes, primes_to_n, 1);
521	0		lastprime = primes[lastidx];
522	0	0	if (x < lastprime) { Safefree(primes); return 1; }
523
524	0		d = phidata_create(primes, lastidx, x, a);
525
526	0		a1 = vcarray_create();
527	0		a2 = vcarray_create();
528	0		vcarray_insert(&a1, x, 1);
529
530	0	0	while (a > PHIC) {
531	0		UV primea = primes[a];
532	0		arr = a1.a;
533
534	0	0	for (i = 0; i < a1.n; i++) {
535	0	0	sval = FAST_DIV(arr[i].v, primea);
536	0		sval -= _pred7[sval % 210]; /* Reduce to lower value if possible */
537	0	0	if (sval < primea \|\| PHI_IS_X_SMALL(sval, a-1))
		0
		0
538			break;
539	0		vcarray_insert(&a2, sval, -arr[i].c);
540			}
541	0	0	for ( ; i < a1.n; i++) {
542	0	0	sval = FAST_DIV(arr[i].v, primea);
543	0	0	if (sval < primea)
544	0		break;
545	0		sum -= arr[i].c * (PHI_PRIMECOUNT(sval)-a+2);
546			}
547	0	0	for ( ; i < a1.n; i++)
548	0		sum -= arr[i].c;
549
550			/* Merge a1 and a2 into a1. a2 will be emptied. */
551	0		vcarray_merge(&a1, &a2);
552
553			/* If we've grown too large, use recursive phi to clip. */
554	0	0	if ( a1.n > NTHRESH ) {
555	0		arr = a1.a;
556	0	0	if (verbose > 0) printf("clipping small values at a=%lu a1.n=%lu \n", a, a1.n);
557	0	0	for (i = 0; i < a1.n-NTHRESH+NTHRESH/50; i++) {
558	0		UV j = a1.n - 1 - i;
559	0		IV count = arr[j].c;
560	0	0	if (count != 0) {
561	0		sum += count * _phi3( arr[j].v, a-1, 1, d );
562	0		arr[j].c = 0;
563			}
564			}
565			}
566
567	0		vcarray_remove_zeros(&a1);
568	0		a--;
569			}
570	0		phidata_destroy(d);
571	0		Safefree(primes);
572	0		vcarray_destroy(&a2);
573	0		arr = a1.a;
574	0	0	for (i = 0; i < a1.n; i++)
575	0		sum += arr[i].c * tablephi( arr[i].v, PHIC );
576	0		vcarray_destroy(&a1);
577	0		return (UV) sum;
578			}
579
580			/============================================================================/
581			/============================================================================/
582
583	3343		uint32_t tiny_phi_max_a(void) { return PHIC; }
584
585	379060		UV tiny_phi(UV n, uint32_t a) {
586	379060		return (a <= PHIC) ? tablephi(n, a)
587	758120	50	: (a <= PHIS) ? phi_small(n, a)
588	0	0	: phi_recurse_small(n, a);
589			}
590
591	0		uint32_t small_phi_max_a(void) { return PHIS; }
592
593	0		UV small_phi(UV n, uint32_t a) {
594	0	0	return (a <= PHIS) ? phi_small(n, a) : phi_recurse(n, a);
595			}
596
597			/============================================================================/
598			/============================================================================/
599
600	1		void* prepare_cached_legendre_phi(UV x, UV a)
601			{
602			uint32_t npa, lastidx, *primes;
603
604	1	50	if (a > 203280221) a = 203280221;
605	1		npa = nth_prime_upper(a);
606	1	50	if (npa < isqrt(x)) npa = isqrt(x);
607	1		lastidx = range_prime_sieve_32(&primes, npa, 1);
608	1		return (void*) phidata_create(primes, lastidx, x, a);
609			}
610	2626		UV cached_legendre_phi(void* cache, UV x, UV a)
611			{
612	2626		phidata_t d = (phidata_t) cache;
613
614	2626	50	if (x < 1 \|\| a >= x) return (x > 0);
		50
615	2626	50	if (x <= PHIC \|\| a <= PHIC) return tablephi(x, (a > PHIC) ? PHIC : a);
		100
616	2556	50	if (a > (x >> 1)) return 1;
617
618			/* Make the function work even if x,a outside of cached conditions */
619	2556	50	if (a > 203280221) { /* prime_count(2*32) /
620	0		UV pc = prime_count(x);
621	0	0	return (a >= pc) ? 1 : pc - a + 1;
622			}
623	2556	50	if (a > d->lastidx)
624	0		return legendre_phi(x, a);
625
626	2556		return (UV) _phi3(x, a-1, 1, d) - (UV) _phi3(x/d->primes[a], a-1, 1, d);
627			}
628
629	1		void destroy_cached_legendre_phi(void* cache)
630			{
631	1		phidata_t d = (phidata_t) cache;
632	1		Safefree(d->primes);
633	1		phidata_destroy(d);
634	1		}
635
636			/* static UV phi_stupid(UV x, UV a) {
637			if (a <= PHIC) return tablephi(x,a);
638			return phi_stupid(x, a-1) - phi_stupid(x/nth_prime(a), a-1);
639			} */
640
641			/============================================================================/
642			/============================================================================/
643
644	22		UV legendre_phi(UV x, UV a)
645			{
646	22		UV sqrtx = isqrt(x);
647
648			/* If 'x' is very small, give a quick answer with any 'a' */
649	22	100	if (x < 1 \|\| a >= x) return (x > 0);
		100
650	20	50	if (x <= PHIC \|\| a <= PHIC) return tablephi(x, (a > PHIC) ? PHIC : a);
		100
651
652			/* Very fast shortcuts for large values */
653	9	50	if (a > (x >> 1))
654	0		return 1;
655	9	100	if (a >= sqrtx \|\| a > 203280221) { /* 203280221 = prime_count(2^32) */
		50
656	1		UV pc = prime_count(x);
657	1	50	return (a >= pc) ? 1 : pc - a + 1;
658			}
659			/* After this: 7 <= a <= MIN(203280221, sqrtx) */
660
661			/* For very small a, calculate now. */
662	8	100	if (a <= PHIS) return phi_small(x, a);
663	5	50	if (a <= PHIR) return phi_recurse_small(x, a);
664
665			/* Better shortcuts, slightly more time */
666	5	50	if (prime_count_upper(x) <= a)
667	0		return 1;
668			/* Use 'a' instead of 'a+1' to ensure Legendre Pi doesn't call here */
669	5	100	if (prime_count_upper(sqrtx) < a) {
670	1		UV pc = prime_count(x);
671	1	50	return (a >= pc) ? 1 : pc - a + 1;
672			}
673			/* Because we used the fast bounds, there are still a few easy cases. */
674
675			/* The best crossover between recurse and walk is complicated */
676			/* TODO: More tuning of the crossovers, or just improve the algorithms. */
677
678	4	50	if (x < 1e10)
679	4		return phi_recurse(x, a);
680
681	0	0	if ( (x >= 1e10 && x < 1e11 && a < 2000) \|\|
		0
		0
682	0	0	(x >= 1e11 && x < 1e12 && a < 4000) \|\|
		0
		0
683	0	0	(x >= 1e12 && x < 1e13 && a < 10000) \|\|
		0
		0
684	0	0	(x >= 1e13 && x < 1e14 && a < 24000) \|\|
		0
		0
685	0	0	(x >= 1e14 && x < 1e15 && a < 80000) \|\|
		0
		0
686	0	0	(x > 1e15 && a < 150000) )
		0
687	0		return phi_walk(x, a);
688
689	0		return phi_recurse(x, a);
690			}
691
692
693
694
695			/============================================================================/
696
697			#if 0
698			// TODO: setup with initial function. optimize. export.
699			IV phi_sum(UV x, UV a, int sign) {
700			IV sum = 0;
701			//if (x < 1) return 0;
702			for (; a > 0; a--) {
703			UV p = nth_prime(a);
704			if (x <= p) {
705			return sum + (long)sign;
706			}
707			sum += p * phi_sum(x / p, a-1, -sign);
708			}
709			if (sign > 0) sum += (x(x+1))/2; else sum -= (x(x+1))/2;
710			return sum;
711			}
712			#endif