File Coverage

prime_counts.c

Criterion	Covered	Total	%
statement	263	277	94.9
branch	272	344	79.0
condition			n/a
subroutine			n/a
pod			n/a
total	535	621	86.1

line	stmt	bran	code
1			#include
2			#include
3			#include
4
5			#define FUNC_popcnt 1
6			#define FUNC_isqrt 1
7			#include "ptypes.h"
8			#include "sieve.h"
9			#include "cache.h"
10			#include "lmo.h"
11			#include "constants.h"
12			#include "prime_counts.h"
13			#include "util.h"
14			#include "real.h"
15			#include "mathl.h"
16
17			#if defined(__GNUC__)
18			#define word_unaligned(m,wordsize) ((uintptr_t)m & (wordsize-1))
19			#else /* uintptr_t is part of C99 */
20			#define word_unaligned(m,wordsize) ((unsigned int)m & (wordsize-1))
21			#endif
22
23			/* TODO: This data is duplicated in util.c. */
24			static const unsigned char prime_sieve30[] =
25			{0x01,0x20,0x10,0x81,0x49,0x24,0xc2,0x06,0x2a,0xb0,0xe1,0x0c,0x15,0x59,0x12,
26			0x61,0x19,0xf3,0x2c,0x2c,0xc4,0x22,0xa6,0x5a,0x95,0x98,0x6d,0x42,0x87,0xe1,
27			0x59,0xa9,0xa9,0x1c,0x52,0xd2,0x21,0xd5,0xb3,0xaa,0x26,0x5c,0x0f,0x60,0xfc,
28			0xab,0x5e,0x07,0xd1,0x02,0xbb,0x16,0x99,0x09,0xec,0xc5,0x47,0xb3,0xd4,0xc5,
29			0xba,0xee,0x40,0xab,0x73,0x3e,0x85,0x4c,0x37,0x43,0x73,0xb0,0xde,0xa7,0x8e,
30			0x8e,0x64,0x3e,0xe8,0x10,0xab,0x69,0xe5,0xf7,0x1a,0x7c,0x73,0xb9,0x8d,0x04,
31			0x51,0x9a,0x6d,0x70,0xa7,0x78,0x2d,0x6d,0x27,0x7e,0x9a,0xd9,0x1c,0x5f,0xee,
32			0xc7,0x38,0xd9,0xc3,0x7e,0x14,0x66,0x72,0xae,0x77,0xc1,0xdb,0x0c,0xcc,0xb2,
33			0xa5,0x74,0xe3,0x58,0xd5,0x4b,0xa7,0xb3,0xb1,0xd9,0x09,0xe6,0x7d,0x23,0x7c,
34			0x3c,0xd3,0x0e,0xc7,0xfd,0x4a,0x32,0x32,0xfd,0x4d,0xb5,0x6b,0xf3,0xa8,0xb3,
35			0x85,0xcf,0xbc,0xf4,0x0e,0x34,0xbb,0x93,0xdb,0x07,0xe6,0xfe,0x6a,0x57,0xa3,
36			0x8c,0x15,0x72,0xdb,0x69,0xd4,0xaf,0x59,0xdd,0xe1,0x3b,0x2e,0xb7,0xf9,0x2b,
37			0xc5,0xd0,0x8b,0x63,0xf8,0x95,0xfa,0x77,0x40,0x97,0xea,0xd1,0x9f,0xaa,0x1c,
38			0x48,0xae,0x67,0xf7,0xeb,0x79,0xa5,0x55,0xba,0xb2,0xb6,0x8f,0xd8,0x2d,0x6c,
39			0x2a,0x35,0x54,0xfd,0x7c,0x9e,0xfa,0xdb,0x31,0x78,0xdd,0x3d,0x56,0x52,0xe7,
40			0x73,0xb2,0x87,0x2e,0x76,0xe9,0x4f,0xa8,0x38,0x9d,0x5d,0x3f,0xcb,0xdb,0xad,
41			0x51,0xa5,0xbf,0xcd,0x72,0xde,0xf7,0xbc,0xcb,0x49,0x2d,0x49,0x26,0xe6,0x1e,
42			0x9f,0x98,0xe5,0xc6,0x9f,0x2f,0xbb,0x85,0x6b,0x65,0xf6,0x77,0x7c,0x57,0x8b,
43			0xaa,0xef,0xd8,0x5e,0xa2,0x97,0xe1,0xdc,0x37,0xcd,0x1f,0xe6,0xfc,0xbb,0x8c,
44			0xb7,0x4e,0xc7,0x3c,0x19,0xd5,0xa8,0x9e,0x67,0x4a,0xe3,0xf5,0x97,0x3a,0x7e,
45			0x70,0x53,0xfd,0xd6,0xe5,0xb8,0x1c,0x6b,0xee,0xb1,0x9b,0xd1,0xeb,0x34,0xc2,
46			0x23,0xeb,0x3a,0xf9,0xef,0x16,0xd6,0x4e,0x7d,0x16,0xcf,0xb8,0x1c,0xcb,0xe6,
47			0x3c,0xda,0xf5,0xcf};
48			#define NPRIME_SIEVE30 (sizeof(prime_sieve30)/sizeof(prime_sieve30[0]))
49
50			static const unsigned short primes_small[] =
51			{0,2,3,5,7,11,13,17,19,23,29,31,37,41,43,47,53,59,61,67,71,73,79,83,89,97,
52			101,103,107,109,113,127,131,137,139,149,151,157,163,167,173,179,181,191,
53			193,197,199,211,223,227,229,233,239,241,251,257,263,269,271,277,281,283,
54			293,307,311,313,317,331,337,347,349,353,359,367,373,379,383,389,397,401,
55			409,419,421,431,433,439,443,449,457,461,463,467,479,487,491,499};
56			#define NPRIMES_SMALL (sizeof(primes_small)/sizeof(primes_small[0]))
57
58
59			static const unsigned char byte_zeros[256] =
60			{8,7,7,6,7,6,6,5,7,6,6,5,6,5,5,4,7,6,6,5,6,5,5,4,6,5,5,4,5,4,4,3,
61			7,6,6,5,6,5,5,4,6,5,5,4,5,4,4,3,6,5,5,4,5,4,4,3,5,4,4,3,4,3,3,2,
62			7,6,6,5,6,5,5,4,6,5,5,4,5,4,4,3,6,5,5,4,5,4,4,3,5,4,4,3,4,3,3,2,
63			6,5,5,4,5,4,4,3,5,4,4,3,4,3,3,2,5,4,4,3,4,3,3,2,4,3,3,2,3,2,2,1,
64			7,6,6,5,6,5,5,4,6,5,5,4,5,4,4,3,6,5,5,4,5,4,4,3,5,4,4,3,4,3,3,2,
65			6,5,5,4,5,4,4,3,5,4,4,3,4,3,3,2,5,4,4,3,4,3,3,2,4,3,3,2,3,2,2,1,
66			6,5,5,4,5,4,4,3,5,4,4,3,4,3,3,2,5,4,4,3,4,3,3,2,4,3,3,2,3,2,2,1,
67			5,4,4,3,4,3,3,2,4,3,3,2,3,2,2,1,4,3,3,2,3,2,2,1,3,2,2,1,2,1,1,0};
68	65435		static UV count_zero_bits(const unsigned char* m, UV nbytes)
69			{
70	65435		UV count = 0;
71			#if BITS_PER_WORD == 64
72	65435	100	if (nbytes >= 16) {
73	157219	100	while ( word_unaligned(m,sizeof(UV)) && nbytes--)
		50
74	135769		count += byte_zeros[*m++];
75	21450	50	if (nbytes >= 8) {
76	21450		UV* wordptr = (UV*)m;
77	21450		UV nwords = nbytes / 8;
78	21450		UV nzeros = nwords * 64;
79	21450		m += nwords * 8;
80	21450		nbytes %= 8;
81	304072	100	while (nwords--)
82	282622		nzeros -= popcnt(*wordptr++);
83	21450		count += nzeros;
84			}
85			}
86			#endif
87	349733	100	while (nbytes--)
88	284298		count += byte_zeros[*m++];
89	65435		return count;
90			}
91
92			/* Given a sieve of size nbytes, walk it counting zeros (primes) until:
93			*
94			* (1) we counted them all: return the count, which will be less than maxcount.
95			*
96			* (2) we hit maxcount: set position to the index of the maxcount'th prime
97			* and return count (which will be equal to maxcount).
98			*/
99	47		static UV count_segment_maxcount(const unsigned char* sieve, UV base, UV nbytes, UV maxcount, UV* pos)
100			{
101	47		UV count = 0;
102	47		UV byte = 0;
103	47		const unsigned char* sieveptr = sieve;
104	47		const unsigned char* maxsieve = sieve + nbytes;
105
106	47	50	MPUassert(sieve != 0, "count_segment_maxcount incorrect args");
107	47	50	MPUassert(pos != 0, "count_segment_maxcount incorrect args");
108	47		*pos = 0;
109	47	50	if ( (nbytes == 0) \|\| (maxcount == 0) )
		50
110	0		return 0;
111
112			/* Do fixed-length word counts to start, with possible overcounting */
113	205	100	while ((count+64) < maxcount && sieveptr < maxsieve) {
		50
114	158		UV top = base + 3*maxcount;
115	158	100	UV div = (top < 8000) ? 8 : /* 8 cannot overcount */
		100
		100
116			(top < 1000000) ? 4 :
117			(top < 10000000) ? 3 : 2;
118	158		UV minbytes = (maxcount-count)/div;
119	158	50	if (minbytes > (UV)(maxsieve-sieveptr)) minbytes = maxsieve-sieveptr;
120	158		count += count_zero_bits(sieveptr, minbytes);
121	158		sieveptr += minbytes;
122			}
123			/* Count until we reach the end or >= maxcount */
124	750	50	while ( (sieveptr < maxsieve) && (count < maxcount) )
		100
125	703		count += byte_zeros[*sieveptr++];
126			/* If we went too far, back up. */
127	94	100	while (count >= maxcount)
128	47		count -= byte_zeros[*--sieveptr];
129			/* We counted this many bytes */
130	47		byte = sieveptr - sieve;
131
132	47	50	MPUassert(count < maxcount, "count_segment_maxcount wrong count");
133
134	47	50	if (byte == nbytes)
135	0		return count;
136
137			/* The result is somewhere in the next byte */
138	506	50	START_DO_FOR_EACH_SIEVE_PRIME(sieve, 0, byte30+1, nbytes30-1)
		50
		100
		50
		50
139	103	100	if (++count == maxcount) { *pos = p; return count; }
140	0		END_DO_FOR_EACH_SIEVE_PRIME;
141
142	0		MPUassert(0, "count_segment_maxcount failure");
143			return 0;
144			}
145
146			/* Given a sieve of size nbytes, counting zeros (primes) but excluding the
147			* areas outside lowp and highp.
148			*/
149	80207		static UV count_segment_ranged(const unsigned char* sieve, UV nbytes, UV lowp, UV highp)
150			{
151			UV count, hi_d, lo_d, lo_m;
152
153	80207	50	MPUassert( sieve != 0, "count_segment_ranged incorrect args");
154	80207	50	if (nbytes == 0) return 0;
155
156	80207		count = 0;
157	80207		hi_d = highp/30;
158
159	80207	50	if (hi_d >= nbytes) {
160	0		hi_d = nbytes-1;
161	0		highp = hi_d*30+29;
162			}
163
164	80207	50	if (highp < lowp)
165	0		return 0;
166
167			#if 0
168			/* Dead simple way */
169			START_DO_FOR_EACH_SIEVE_PRIME(sieve, 0, lowp, highp)
170			count++;
171			END_DO_FOR_EACH_SIEVE_PRIME;
172			return count;
173			#endif
174
175	80207		lo_d = lowp/30;
176	80207		lo_m = lowp - lo_d*30;
177			/* Count first fragment */
178	80207	100	if (lo_m > 1) {
179	77569		UV upper = (highp <= (lo_d30+29)) ? highp : (lo_d30+29);
180	671592	50	START_DO_FOR_EACH_SIEVE_PRIME(sieve, 0, lowp, upper)
		100
		100
		100
		100
181	502313		count++;
182	77569		END_DO_FOR_EACH_SIEVE_PRIME;
183	77569		lowp = upper+2;
184	77569		lo_d = lowp/30;
185			}
186	80207	100	if (highp < lowp)
187	9377		return count;
188
189			/* Count bytes in the middle */
190			{
191	70830		UV hi_m = highp - hi_d*30;
192	70830		UV count_bytes = hi_d - lo_d + (hi_m == 29);
193	70830	100	if (count_bytes > 0) {
194	65277		count += count_zero_bits(sieve+lo_d, count_bytes);
195	65277		lowp += 30*count_bytes;
196			}
197			}
198	70830	100	if (highp < lowp)
199	4638		return count;
200
201			/* Count last fragment */
202	1508344	50	START_DO_FOR_EACH_SIEVE_PRIME(sieve, 0, lowp, highp)
		100
		100
		100
		100
203	170140		count++;
204	66192		END_DO_FOR_EACH_SIEVE_PRIME;
205
206	66192		return count;
207			}
208
209
210			/*
211			* The pi(x) prime count functions. prime_count(x) gives an exact number,
212			* but requires determining all the primes up to x, so will be much slower.
213			*
214			* prime_count_lower(x) and prime_count_upper(x) give lower and upper limits,
215			* which will bound the exact value. These bounds should be fairly tight.
216			*
217			* pi_upper(x) - pi(x) pi_lower(x) - pi(x)
218			* < 10 for x < 5_371 < 10 for x < 9_437
219			* < 50 for x < 295_816 < 50 for x < 136_993
220			* < 100 for x < 1_761_655 < 100 for x < 909_911
221			* < 200 for x < 9_987_821 < 200 for x < 8_787_901
222			* < 400 for x < 34_762_891 < 400 for x < 30_332_723
223			* < 1000 for x < 372_748_528 < 1000 for x < 233_000_533
224			* < 5000 for x < 1_882_595_905 < 5000 for x < over 4300M
225			*
226			* The average of the upper and lower bounds is within 9 for all x < 15809, and
227			* within 50 for all x < 1_763_367.
228			*
229			* It is common to use the following Chebyshev inequality for x >= 17:
230			* 1x/logx <-> 1.25506x/logx
231			* but this gives terribly loose bounds.
232			*
233			* Rosser and Schoenfeld's bound for x >= 67 of
234			* x/(logx-1/2) <-> x/(logx-3/2)
235			* is much tighter. These bounds can be tightened even more.
236			*
237			* The formulas of Dusart for higher x are better yet. I recommend the paper
238			* by Burde for further information. Dusart's thesis is also a good resource.
239			*
240			* I have tweaked the bounds formulas for small (under 70_000M) numbers so they
241			* are tighter. These bounds are verified via trial. The Dusart bounds
242			* (1.8 and 2.51) are used for larger numbers since those are proven.
243			*
244			*/
245
246			#include "prime_count_tables.h"
247
248	95477		UV segment_prime_count(UV low, UV high)
249			{
250			const unsigned char* cache_sieve;
251			unsigned char* segment;
252			UV segment_size, low_d, high_d;
253	95477		UV count = 0;
254
255	95477	100	if ((low <= 2) && (high >= 2)) count++;
		100
256	95477	100	if ((low <= 3) && (high >= 3)) count++;
		100
257	95477	100	if ((low <= 5) && (high >= 5)) count++;
		100
258	95477	100	if (low < 7) low = 7;
259
260	95477	100	if (low > high) return count;
261
262			#if !defined(BENCH_SEGCOUNT)
263	80207	100	if (low == 7 && high <= 30*NPRIME_SIEVE30) {
		100
264	76353		count += count_segment_ranged(prime_sieve30, NPRIME_SIEVE30, low, high);
265	76353		return count;
266			}
267
268			/* If we have sparse prime count tables, use them here. These will adjust
269			* 'low' and 'count' appropriately for a value slightly less than ours.
270			* This should leave just a small amount of sieving left. They stop at
271			* some point, e.g. 3000M, so we'll get the answer to that point then have
272			* to sieve all the rest. We should be using LMO or Lehmer much earlier. */
273			#ifdef APPLY_TABLES
274	409860	100	APPLY_TABLES
		50
		100
		100
		50
		100
		100
		50
		100
		100
		50
		100
		100
		50
		50
		100
		50
		0
		0
		0
		0
		0
275			#endif
276			#endif
277
278	3854		low_d = low/30;
279	3854		high_d = high/30;
280
281			/* Count full bytes only -- no fragments from primary cache */
282	3854		segment_size = get_prime_cache(0, &cache_sieve) / 30;
283	3854	100	if (segment_size < high_d) {
284			/* Expand sieve to sqrt(n) */
285	285	50	UV endp = (high_d >= (UV_MAX/30)) ? UV_MAX-2 : 30*high_d+29;
286	285		UV newsize = (UV)isqrt(endp)+1;
287	285	100	if (newsize > 2642245) newsize = 2642245; /* Limit to icbrt(2^64) */
288			release_prime_cache(cache_sieve);
289	285		segment_size = get_prime_cache( newsize, &cache_sieve) / 30;
290			}
291
292	3854	50	if ( (segment_size > 0) && (low_d <= segment_size) ) {
		100
293			/* Count all the primes in the primary cache in our range */
294	3569		count += count_segment_ranged(cache_sieve, segment_size, low, high);
295
296	3569	50	if (high_d < segment_size) {
297			release_prime_cache(cache_sieve);
298	3569		return count;
299			}
300
301	0		low_d = segment_size;
302	0	0	if (30low_d > low) low = 30low_d;
303			}
304			release_prime_cache(cache_sieve);
305
306			/* More primes needed. Repeatedly segment sieve. */
307			{
308	285		void* ctx = start_segment_primes(low, high, &segment);
309			UV seg_base, seg_low, seg_high;
310	570	100	while (next_segment_primes(ctx, &seg_base, &seg_low, &seg_high)) {
311	285		segment_size = seg_high/30 - seg_low/30 + 1;
312	285		count += count_segment_ranged(segment, segment_size, seg_low-seg_base, seg_high-seg_base);
313			}
314	285		end_segment_primes(ctx);
315			}
316
317	285		return count;
318			}
319
320	150		UV prime_count_range(UV lo, UV hi)
321			{
322	150	50	if (lo > hi \|\| hi < 2)
		100
323	8		return 0;
324
325			#if defined(BENCH_SEGCOUNT)
326			return segment_prime_count(lo, hi);
327			#endif
328
329			/* We use table acceleration so this is preferable for small inputs */
330	142	100	if (hi < _MPU_LMO_CROSSOVER) return segment_prime_count(lo, hi);
331
332			{ /* Rough empirical threshold for when segment faster than LMO */
333	17		UV range_threshold = hi / (isqrt(hi)/200);
334	17	100	if ( (hi-lo+1) < range_threshold )
335	12		return segment_prime_count(lo, hi);
336			}
337	5	50	return LMO_prime_count(hi) - ((lo < 2) ? 0 : LMO_prime_count(lo-1));
338			}
339
340	69634		UV prime_count(UV n)
341			{
342	69634	50	if (n < 2) return 0;
343
344			/* We use table acceleration so this is preferable for small inputs */
345	69634	100	if (n < _MPU_LMO_CROSSOVER) return segment_prime_count(0, n);
346
347	782		return LMO_prime_count(n);
348			}
349
350	9623		UV prime_count_approx(UV n)
351			{
352	9623	100	if (n < 3000000) return segment_prime_count(2, n);
353	214		return (UV) (RiemannR((long double) n, 1e-6) + 0.5);
354			}
355
356
357	7016		UV prime_count_lower(UV n)
358			{
359			long double fn, fl1, fl2, lower, a;
360
361	7016	100	if (n < 33000) return segment_prime_count(2, n);
362
363	413		fn = (long double) n;
364	413		fl1 = logl(n);
365	413		fl2 = fl1 * fl1;
366
367			/* Axler 2014: https://arxiv.org/abs/1409.1780 (v7 2016), Cor 3.6
368			* show variations of this. */
369
370	413	100	if (n <= 300070) { /* Quite accurate and avoids calling Li for speed. */
371			/* Based on Axler 2022, page 9, Corollary 5.1 */
372	263	100	a = (n < 69720) ? 905 :
		50
		100
		100
		50
373			(n < 70120) ? 961 :
374			(n < 88800) ? 918.2 :
375			(n < 176000) ? 887.7 :
376			(n < 299270) ? 839.46 :
377			846.66; /* Good to 300071 */
378	263		lower = fn / (fl1 - 1 - 1/fl1 - 2.975666/fl2 - 13.024334/(fl1fl2) + a/(fl2fl2));
379	150	100	} else if (n < UVCONST(4000000000)) {
380			/* Loose enough that FP differences in Li(n) should be ok. */
381	130		a = (n < 88783) ? 4.0L
382	260	50	: (n < 300000) ? -3.0L
383	260	50	: (n < 303000) ? 5.0L
384	260	50	: (n < 1100000) ? -7.0L
385	248	100	: (n < 4500000) ? -37.0L
386	225	100	: (n < 10200000) ? -70.0L
387	199	100	: (n < 36900000) ? -53.0L
388	159	100	: (n < 38100000) ? -29.0L
389	67	50	: -84.0L;
390	130		lower = Li(fn) - (sqrtl(fn)/fl1) * (1.94L + 2.50L/fl1 + a/fl2);
391	20	100	} else if (fn < 1e19) { /* Büthe 2015 1.9 1511.02032v1.pdf */
392	18		lower = Li(fn) - (sqrtl(fn)/fl1) * (1.94L + 3.88L/fl1 + 27.57L/fl2);
393			} else { /* Büthe 2014 v3 7.2 1410.7015v3.pdf */
394	2		lower = Li(fn) - fl1*sqrtl(fn)/25.132741228718345907701147L;
395			}
396	413		return (UV) ceill(lower);
397			}
398
399			typedef struct {
400			UV thresh;
401			float aval;
402			} thresh_t;
403
404			static const thresh_t _upper_thresh[] = {
405			{ 59000, 2.48f },
406			{ 355991, 2.54f },
407			{ 3550000, 2.51f },
408			{ 3560000, 2.49f },
409			{ 5000000, 2.48f },
410			{ 8000000, 2.47f },
411			{ 13000000, 2.46f },
412			{ 18000000, 2.45f },
413			{ 31000000, 2.44f },
414			{ 41000000, 2.43f },
415			{ 48000000, 2.42f },
416			{ 119000000, 2.41f },
417			{ 182000000, 2.40f },
418			{ 192000000, 2.395f },
419			{ 213000000, 2.390f },
420			{ 271000000, 2.385f },
421			{ 322000000, 2.380f },
422			{ 400000000, 2.375f },
423			{ 510000000, 2.370f },
424			{ 682000000, 2.367f },
425			{ UVCONST(2953652287), 2.362f }
426			};
427			#define NUPPER_THRESH (sizeof(_upper_thresh)/sizeof(_upper_thresh[0]))
428
429	7455		UV prime_count_upper(UV n)
430			{
431			int i;
432			long double fn, fl1, fl2, upper, a;
433
434	7455	100	if (n < 33000) return segment_prime_count(2, n);
435
436	815		fn = (long double) n;
437	815		fl1 = logl(n);
438	815		fl2 = fl1 * fl1;
439
440			/* Axler 2014: https://arxiv.org/abs/1409.1780 (v7 2016), Cor 3.5
441			*
442			* upper = fn/(fl1-1.0L-1.0L/fl1-3.35L/fl2-12.65L/(fl2fl1)-89.6L/(fl2fl2));
443			* return (UV) floorl(upper);
444			*
445			* Axler 2022: https://arxiv.org/pdf/2203.05917.pdf (v4 2022) improves this.
446			*/
447
448	815	100	if (BITS_PER_WORD == 32 \|\| fn <= 821800000.0) { /* Dusart 2010, page 2 */
449	3493	50	for (i = 0; i < (int)NUPPER_THRESH; i++)
450	3493	100	if (n < _upper_thresh[i].thresh)
451	770		break;
452	770	50	a = (i < (int)NUPPER_THRESH) ? _upper_thresh[i].aval : 2.334L;
453	770		upper = fn/fl1 * (1.0L + 1.0L/fl1 + a/fl2);
454	45	100	} else if (fn < 1e19) { /* Büthe 2015 1.10 Skewes number lower limit */
455	43		a = (fn < 1100000000.0) ? 0.032 /* Empirical */
456	43	100	: (fn < 10010000000.0) ? 0.027 /* Empirical */
		100
		100
457			: (fn < 101260000000.0) ? 0.021 /* Empirical */
458			: 0.0;
459	43		upper = Li(fn) - a * fl1*sqrtl(fn)/25.132741228718345907701147L;
460			} else { /* Büthe 2014 7.4 */
461	2		upper = Li(fn) + fl1*sqrtl(fn)/25.132741228718345907701147L;
462			}
463	815		return (UV) floorl(upper);
464			}
465
466	623		static void simple_nth_limits(UV lo, UV hi, long double n, long double logn, long double loglogn) {
467	623	100	const long double a = (n < 228) ? .6483 : (n < 948) ? .8032 : (n < 2195) ? .8800 : (n < 39017) ? .9019 : .9484;
		100
		100
		100
468	623		lo = n (logn + loglogn - 1.0 + ((loglogn-2.10)/logn));
469	623		hi = n (logn + loglogn - a);
470	623	50	if (hi < lo) *hi = MPU_MAX_PRIME;
471	623		}
472
473			/* The nth prime will be less or equal to this number */
474	630		UV nth_prime_upper(UV n)
475			{
476			long double fn, flogn, flog2n, upper, c, d;
477
478	630	100	if (n < NPRIMES_SMALL)
479	211		return primes_small[n];
480	419	100	if (n >= MPU_MAX_PRIME_IDX)
481	1	50	return n == MPU_MAX_PRIME_IDX ? MPU_MAX_PRIME : 0;
482
483	418		fn = (long double) n;
484	418		flogn = logl(n);
485	418		flog2n = logl(flogn); /* Note distinction between log_2(n) and log^2(n) */
486
487			/* Binary search on prime count lower. Good but quite slow. */
488	418	100	if (n < 15360) {
489			UV lo,hi;
490	306		simple_nth_limits(&lo, &hi, fn, flogn, flog2n);
491	2277	100	while (lo < hi) {
492	1971		UV mid = lo + (hi-lo)/2;
493	1971	100	if (prime_count_lower(mid) < n) lo = mid+1;
494	957		else hi = mid;
495			}
496	306		return lo;
497			}
498
499			/* See: Axler 2013, Dusart 2010 */
500			/* Axler 2017: http://arxiv.org/pdf/1706.03651.pdf */
501
502	112	100	if (n >= 46254381) { c = 2.00; d = 10.667; } /* Axler 2017 Cor 1.2 */
503	94	100	else if (n >= 8009824) { c = 2.00; d = 10.273; } /* Axler 2013 Kor G */
504			/* This is about 3x better than Dusart (2010) for 688382-8009823:
505			*
506			* else if (n >= 688382) { c = 2.30; d = 0.5730; }
507			*
508			* but we can split the range and get another 2x improvement in MSE.
509			*/
510	76	100	else if (n >= 5450000) { c = 2.00; d = 10.1335; } /5450-8009 /
511	64	100	else if (n >= 3906280) { c = 1.67; d = 20.2675; } /3906-5450 /
512	61	100	else if (n >= 2110840) { c = 2.51; d = -5.5714; } /2110-3906 /
513	56	100	else if (n >= 876700) { c = 2.49; d = -4.5129; } /* 877-2110 */
514	49	100	else if (n >= 688382) { c = 3.31; d = -26.3858; } /* 688-877 */
515			/* Use the Axler framework to get good bounds for smaller inputs. */
516	44	50	else if (n >= 575750) { c =-0.79; d = 83.5215; } /* 580-688 */
517	44	50	else if (n >= 467650) { c = 0.93; d = 37.1597; } /* 467-580 */
518	44	100	else if (n >= 382440) { c = 2.92; d = -15.4768; } /* 382-467 */
519	40	100	else if (n >= 301130) { c = 5.92; d = -91.3415; } /* 301-382 */
520	39	100	else if (n >= 138630) { c = 2.01; d = 7.2842; } /* 138-301 */
521	36	100	else if (n >= 85820) { c = 2.07; d = 5.2103; } /* 86-138 */
522	23	100	else if (n >= 39016) { c = 2.76; d = -11.5918; } /* 39- 86 */
523	6	50	else if (n >= 31490) { c = 1.49; d = 15.1821; } /* 31- 39 */
524	6	100	else if (n >= 25070) { c =11.89; d =-197.8951; } /* 25- 31 */
525	5	50	else if (n >= 15359) { c = 4.80; d = -51.5928; } /* 15- 25 */
526	0		else { c = 3.92; d = -33.3994; } /* 0- 15 */
527
528	112		upper = fn * ( flogn + flog2n - 1.0 + ((flog2n-c)/flogn)
529	112		- (flog2nflog2n-6flog2n+d)/(2flognflogn) );
530
531	112	50	if (upper >= (long double)UV_MAX) {
532	0	0	if (n <= MPU_MAX_PRIME_IDX) return MPU_MAX_PRIME;
533	0		croak("nth_prime_upper(%"UVuf") overflow", n);
534			}
535
536	112		return (UV) floorl(upper);
537			}
538
539			/* The nth prime will be greater than or equal to this number */
540	508		UV nth_prime_lower(UV n)
541			{
542			double fn, flogn, flog2n;
543			UV plower;
544
545	508	100	if (n < NPRIMES_SMALL)
546	139		return primes_small[n];
547	369	100	if (n >= MPU_MAX_PRIME_IDX)
548	2	50	return n == MPU_MAX_PRIME_IDX ? MPU_MAX_PRIME : 0;
549
550	367		fn = (double) n;
551	367		flogn = log(n);
552	367		flog2n = log(flogn);
553
554			/* For small values, do a binary search on the inverse prime count */
555	367	100	if (n < 2000000) {
556			UV lo,hi;
557	317		simple_nth_limits(&lo, &hi, fn, flogn, flog2n);
558	2531	100	while (lo < hi) {
559	2214		UV mid = lo + (hi-lo)/2;
560	2214	100	if (prime_count_upper(mid) < n) lo = mid+1;
561	1174		else hi = mid;
562			}
563	317		return lo;
564			}
565
566			{ /* Axler 2017 http://arxiv.org/pdf/1706.03651.pdf Corollary 1.4 */
567	50	100	double b1 = (n < 56000000) ? 11.200 : 11.50800000002;
568	50		double lower = fn * (flogn + flog2n-1.0 + ((flog2n-2.00)/flogn) - ((flog2nflog2n-6flog2n+b1)/(2flognflogn)));
569	50		plower = (UV) ceill(lower);
570			}
571	50		return plower < MPU_MAX_PRIME ? plower : MPU_MAX_PRIME;
572			}
573
574	26		UV nth_prime_approx(UV n)
575			{
576	26	100	return (n < NPRIMES_SMALL) ? primes_small[n] : inverse_R(n);
577			}
578
579
580	222		UV nth_prime(UV n)
581			{
582			const unsigned char* cache_sieve;
583			unsigned char* segment;
584			UV upper_limit, segbase, segment_size, p, count, target;
585
586			/* If very small, return the table entry */
587	222	100	if (n < NPRIMES_SMALL)
588	174		return primes_small[n];
589	48	50	if (n >= MPU_MAX_PRIME_IDX)
590	0	0	return n == MPU_MAX_PRIME_IDX ? MPU_MAX_PRIME : 0;
591
592			/* Determine a bound on the nth prime. We know it comes before this. */
593	48		upper_limit = nth_prime_upper(n);
594	48	50	MPUassert(upper_limit > 0, "nth_prime got an upper limit of 0");
595	48		p = count = 0;
596	48		target = n-3;
597
598			/* For relatively small values, generate a sieve and count the results.
599			*
600			* For larger values, compute an approximate low estimate, use our fast
601			* prime count, then segment sieve forwards or backwards for the rest.
602			*/
603	48	100	if (upper_limit <= get_prime_cache(0, 0) \|\| upper_limit <= 32102430) {
		100
604			/* Generate a sieve and count. */
605	31		segment_size = get_prime_cache(upper_limit, &cache_sieve) / 30;
606			/* Count up everything in the cached sieve. */
607	31	50	if (segment_size > 0)
608	31		count += count_segment_maxcount(cache_sieve, 0, segment_size, target, &p);
609			release_prime_cache(cache_sieve);
610			} else {
611			/* A binary search on RiemannR is nice, but ends up either often being
612			* being higher (requiring going backwards) or biased and then far too
613			* low. Using the inverse Li is easier and more consistent. */
614	17		UV lower_limit = inverse_li(n);
615			/* For even better performance, add in half the usual correction, which
616			* will get us even closer, so even less sieving required. However, it
617			* is now possible to get a result higher than the value, so we'll need
618			* to handle that case. It still ends up being a better deal than R,
619			* given that we don't have a fast backward sieve. */
620	17		lower_limit += inverse_li(isqrt(n))/4;
621	17		segment_size = lower_limit / 30;
622	17		lower_limit = 30 * segment_size - 1;
623	17		count = prime_count(lower_limit);
624
625			/* printf("We've estimated %lu too %s.\n", (count>n)?count-n:n-count, (count>n)?"FAR":"little"); */
626			/* printf("Our limit %lu %s a prime\n", lower_limit, is_prime(lower_limit) ? "is" : "is not"); */
627
628	17	100	if (count >= n) { /* Too far. Walk backwards */
629	1	50	if (is_prime(lower_limit)) count--;
630	2	100	for (p = 0; p <= (count-n); p++)
631	1		lower_limit = prev_prime(lower_limit);
632	1		return lower_limit;
633			}
634	16		count -= 3;
635
636			/* Make sure the segment siever won't have to keep resieving. */
637	16		prime_precalc(isqrt(upper_limit));
638			}
639
640	47	100	if (count == target)
641	31		return p;
642
643			/* Start segment sieving. Get memory to sieve into. */
644	16		segbase = segment_size;
645	16		segment = get_prime_segment(&segment_size);
646
647	32	100	while (count < target) {
648			/* Limit the segment size if we know the answer comes earlier */
649	16	100	if ( (30*(segbase+segment_size)+29) > upper_limit )
650	15		segment_size = (upper_limit - segbase*30 + 30) / 30;
651
652			/* Do the actual sieving in the range */
653	16		sieve_segment(segment, segbase, segbase + segment_size-1);
654
655			/* Count up everything in this segment */
656	16		count += count_segment_maxcount(segment, 30*segbase, segment_size, target-count, &p);
657
658	16	50	if (count < target)
659	0		segbase += segment_size;
660			}
661	16		release_prime_segment(segment);
662	16	50	MPUassert(count == target, "nth_prime got incorrect count");
663	16		return ( (segbase*30) + p );
664			}
665
666
667
668			/******************************************************************************/
669			/* MISC */
670			/******************************************************************************/
671
672
673	788		double ramanujan_axler(long double n, long double c, long double d) {
674	788		long double res, U, c1, c2, log2 = logl(2), logn = logl(n), loglogn = logl(logn);
675
676	788		c1 = 2log2log2 + log2 + c;
677	788		c2 = log2log2log2 + 2log2log2 + d;
678
679	788		U = (log2 * lognloglognloglogn - c1lognloglogn + c2logn - log2log2loglogn + log2log2log2 + log2log2)
680	788		/ (lognlognlognlogn + lognlognlognloglogn - lognlognlognlog2 - lognlogn*log2);
681
682	788		res = 2n (1.0L + log2/logn - (log2loglogn - log2log2 - log2) / (logn*logn) + U);
683	788		return res;
684			}