File Coverage

prime_counts.c
Criterion Covered Total %
statement 263 277 94.9
branch 272 344 79.0
condition n/a
subroutine n/a
pod n/a
total 535 621 86.1


line stmt bran cond sub pod time code
1             #include
2             #include
3             #include
4              
5             #define FUNC_popcnt 1
6             #define FUNC_isqrt 1
7             #include "ptypes.h"
8             #include "sieve.h"
9             #include "cache.h"
10             #include "lmo.h"
11             #include "constants.h"
12             #include "prime_counts.h"
13             #include "util.h"
14             #include "real.h"
15             #include "mathl.h"
16              
17             #if defined(__GNUC__)
18             #define word_unaligned(m,wordsize) ((uintptr_t)m & (wordsize-1))
19             #else /* uintptr_t is part of C99 */
20             #define word_unaligned(m,wordsize) ((unsigned int)m & (wordsize-1))
21             #endif
22              
23             /* TODO: This data is duplicated in util.c. */
24             static const unsigned char prime_sieve30[] =
25             {0x01,0x20,0x10,0x81,0x49,0x24,0xc2,0x06,0x2a,0xb0,0xe1,0x0c,0x15,0x59,0x12,
26             0x61,0x19,0xf3,0x2c,0x2c,0xc4,0x22,0xa6,0x5a,0x95,0x98,0x6d,0x42,0x87,0xe1,
27             0x59,0xa9,0xa9,0x1c,0x52,0xd2,0x21,0xd5,0xb3,0xaa,0x26,0x5c,0x0f,0x60,0xfc,
28             0xab,0x5e,0x07,0xd1,0x02,0xbb,0x16,0x99,0x09,0xec,0xc5,0x47,0xb3,0xd4,0xc5,
29             0xba,0xee,0x40,0xab,0x73,0x3e,0x85,0x4c,0x37,0x43,0x73,0xb0,0xde,0xa7,0x8e,
30             0x8e,0x64,0x3e,0xe8,0x10,0xab,0x69,0xe5,0xf7,0x1a,0x7c,0x73,0xb9,0x8d,0x04,
31             0x51,0x9a,0x6d,0x70,0xa7,0x78,0x2d,0x6d,0x27,0x7e,0x9a,0xd9,0x1c,0x5f,0xee,
32             0xc7,0x38,0xd9,0xc3,0x7e,0x14,0x66,0x72,0xae,0x77,0xc1,0xdb,0x0c,0xcc,0xb2,
33             0xa5,0x74,0xe3,0x58,0xd5,0x4b,0xa7,0xb3,0xb1,0xd9,0x09,0xe6,0x7d,0x23,0x7c,
34             0x3c,0xd3,0x0e,0xc7,0xfd,0x4a,0x32,0x32,0xfd,0x4d,0xb5,0x6b,0xf3,0xa8,0xb3,
35             0x85,0xcf,0xbc,0xf4,0x0e,0x34,0xbb,0x93,0xdb,0x07,0xe6,0xfe,0x6a,0x57,0xa3,
36             0x8c,0x15,0x72,0xdb,0x69,0xd4,0xaf,0x59,0xdd,0xe1,0x3b,0x2e,0xb7,0xf9,0x2b,
37             0xc5,0xd0,0x8b,0x63,0xf8,0x95,0xfa,0x77,0x40,0x97,0xea,0xd1,0x9f,0xaa,0x1c,
38             0x48,0xae,0x67,0xf7,0xeb,0x79,0xa5,0x55,0xba,0xb2,0xb6,0x8f,0xd8,0x2d,0x6c,
39             0x2a,0x35,0x54,0xfd,0x7c,0x9e,0xfa,0xdb,0x31,0x78,0xdd,0x3d,0x56,0x52,0xe7,
40             0x73,0xb2,0x87,0x2e,0x76,0xe9,0x4f,0xa8,0x38,0x9d,0x5d,0x3f,0xcb,0xdb,0xad,
41             0x51,0xa5,0xbf,0xcd,0x72,0xde,0xf7,0xbc,0xcb,0x49,0x2d,0x49,0x26,0xe6,0x1e,
42             0x9f,0x98,0xe5,0xc6,0x9f,0x2f,0xbb,0x85,0x6b,0x65,0xf6,0x77,0x7c,0x57,0x8b,
43             0xaa,0xef,0xd8,0x5e,0xa2,0x97,0xe1,0xdc,0x37,0xcd,0x1f,0xe6,0xfc,0xbb,0x8c,
44             0xb7,0x4e,0xc7,0x3c,0x19,0xd5,0xa8,0x9e,0x67,0x4a,0xe3,0xf5,0x97,0x3a,0x7e,
45             0x70,0x53,0xfd,0xd6,0xe5,0xb8,0x1c,0x6b,0xee,0xb1,0x9b,0xd1,0xeb,0x34,0xc2,
46             0x23,0xeb,0x3a,0xf9,0xef,0x16,0xd6,0x4e,0x7d,0x16,0xcf,0xb8,0x1c,0xcb,0xe6,
47             0x3c,0xda,0xf5,0xcf};
48             #define NPRIME_SIEVE30 (sizeof(prime_sieve30)/sizeof(prime_sieve30[0]))
49              
50             static const unsigned short primes_small[] =
51             {0,2,3,5,7,11,13,17,19,23,29,31,37,41,43,47,53,59,61,67,71,73,79,83,89,97,
52             101,103,107,109,113,127,131,137,139,149,151,157,163,167,173,179,181,191,
53             193,197,199,211,223,227,229,233,239,241,251,257,263,269,271,277,281,283,
54             293,307,311,313,317,331,337,347,349,353,359,367,373,379,383,389,397,401,
55             409,419,421,431,433,439,443,449,457,461,463,467,479,487,491,499};
56             #define NPRIMES_SMALL (sizeof(primes_small)/sizeof(primes_small[0]))
57              
58              
59             static const unsigned char byte_zeros[256] =
60             {8,7,7,6,7,6,6,5,7,6,6,5,6,5,5,4,7,6,6,5,6,5,5,4,6,5,5,4,5,4,4,3,
61             7,6,6,5,6,5,5,4,6,5,5,4,5,4,4,3,6,5,5,4,5,4,4,3,5,4,4,3,4,3,3,2,
62             7,6,6,5,6,5,5,4,6,5,5,4,5,4,4,3,6,5,5,4,5,4,4,3,5,4,4,3,4,3,3,2,
63             6,5,5,4,5,4,4,3,5,4,4,3,4,3,3,2,5,4,4,3,4,3,3,2,4,3,3,2,3,2,2,1,
64             7,6,6,5,6,5,5,4,6,5,5,4,5,4,4,3,6,5,5,4,5,4,4,3,5,4,4,3,4,3,3,2,
65             6,5,5,4,5,4,4,3,5,4,4,3,4,3,3,2,5,4,4,3,4,3,3,2,4,3,3,2,3,2,2,1,
66             6,5,5,4,5,4,4,3,5,4,4,3,4,3,3,2,5,4,4,3,4,3,3,2,4,3,3,2,3,2,2,1,
67             5,4,4,3,4,3,3,2,4,3,3,2,3,2,2,1,4,3,3,2,3,2,2,1,3,2,2,1,2,1,1,0};
68 65435           static UV count_zero_bits(const unsigned char* m, UV nbytes)
69             {
70 65435           UV count = 0;
71             #if BITS_PER_WORD == 64
72 65435 100         if (nbytes >= 16) {
73 157219 100         while ( word_unaligned(m,sizeof(UV)) && nbytes--)
    50          
74 135769           count += byte_zeros[*m++];
75 21450 50         if (nbytes >= 8) {
76 21450           UV* wordptr = (UV*)m;
77 21450           UV nwords = nbytes / 8;
78 21450           UV nzeros = nwords * 64;
79 21450           m += nwords * 8;
80 21450           nbytes %= 8;
81 304072 100         while (nwords--)
82 282622           nzeros -= popcnt(*wordptr++);
83 21450           count += nzeros;
84             }
85             }
86             #endif
87 349733 100         while (nbytes--)
88 284298           count += byte_zeros[*m++];
89 65435           return count;
90             }
91              
92             /* Given a sieve of size nbytes, walk it counting zeros (primes) until:
93             *
94             * (1) we counted them all: return the count, which will be less than maxcount.
95             *
96             * (2) we hit maxcount: set position to the index of the maxcount'th prime
97             * and return count (which will be equal to maxcount).
98             */
99 47           static UV count_segment_maxcount(const unsigned char* sieve, UV base, UV nbytes, UV maxcount, UV* pos)
100             {
101 47           UV count = 0;
102 47           UV byte = 0;
103 47           const unsigned char* sieveptr = sieve;
104 47           const unsigned char* maxsieve = sieve + nbytes;
105              
106 47 50         MPUassert(sieve != 0, "count_segment_maxcount incorrect args");
107 47 50         MPUassert(pos != 0, "count_segment_maxcount incorrect args");
108 47           *pos = 0;
109 47 50         if ( (nbytes == 0) || (maxcount == 0) )
    50          
110 0           return 0;
111              
112             /* Do fixed-length word counts to start, with possible overcounting */
113 205 100         while ((count+64) < maxcount && sieveptr < maxsieve) {
    50          
114 158           UV top = base + 3*maxcount;
115 158 100         UV div = (top < 8000) ? 8 : /* 8 cannot overcount */
    100          
    100          
116             (top < 1000000) ? 4 :
117             (top < 10000000) ? 3 : 2;
118 158           UV minbytes = (maxcount-count)/div;
119 158 50         if (minbytes > (UV)(maxsieve-sieveptr)) minbytes = maxsieve-sieveptr;
120 158           count += count_zero_bits(sieveptr, minbytes);
121 158           sieveptr += minbytes;
122             }
123             /* Count until we reach the end or >= maxcount */
124 750 50         while ( (sieveptr < maxsieve) && (count < maxcount) )
    100          
125 703           count += byte_zeros[*sieveptr++];
126             /* If we went too far, back up. */
127 94 100         while (count >= maxcount)
128 47           count -= byte_zeros[*--sieveptr];
129             /* We counted this many bytes */
130 47           byte = sieveptr - sieve;
131              
132 47 50         MPUassert(count < maxcount, "count_segment_maxcount wrong count");
133              
134 47 50         if (byte == nbytes)
135 0           return count;
136              
137             /* The result is somewhere in the next byte */
138 506 50         START_DO_FOR_EACH_SIEVE_PRIME(sieve, 0, byte*30+1, nbytes*30-1)
    50          
    100          
    50          
    50          
139 103 100         if (++count == maxcount) { *pos = p; return count; }
140 0           END_DO_FOR_EACH_SIEVE_PRIME;
141              
142 0           MPUassert(0, "count_segment_maxcount failure");
143             return 0;
144             }
145              
146             /* Given a sieve of size nbytes, counting zeros (primes) but excluding the
147             * areas outside lowp and highp.
148             */
149 80207           static UV count_segment_ranged(const unsigned char* sieve, UV nbytes, UV lowp, UV highp)
150             {
151             UV count, hi_d, lo_d, lo_m;
152              
153 80207 50         MPUassert( sieve != 0, "count_segment_ranged incorrect args");
154 80207 50         if (nbytes == 0) return 0;
155              
156 80207           count = 0;
157 80207           hi_d = highp/30;
158              
159 80207 50         if (hi_d >= nbytes) {
160 0           hi_d = nbytes-1;
161 0           highp = hi_d*30+29;
162             }
163              
164 80207 50         if (highp < lowp)
165 0           return 0;
166              
167             #if 0
168             /* Dead simple way */
169             START_DO_FOR_EACH_SIEVE_PRIME(sieve, 0, lowp, highp)
170             count++;
171             END_DO_FOR_EACH_SIEVE_PRIME;
172             return count;
173             #endif
174              
175 80207           lo_d = lowp/30;
176 80207           lo_m = lowp - lo_d*30;
177             /* Count first fragment */
178 80207 100         if (lo_m > 1) {
179 77569           UV upper = (highp <= (lo_d*30+29)) ? highp : (lo_d*30+29);
180 671592 50         START_DO_FOR_EACH_SIEVE_PRIME(sieve, 0, lowp, upper)
    100          
    100          
    100          
    100          
181 502313           count++;
182 77569           END_DO_FOR_EACH_SIEVE_PRIME;
183 77569           lowp = upper+2;
184 77569           lo_d = lowp/30;
185             }
186 80207 100         if (highp < lowp)
187 9377           return count;
188              
189             /* Count bytes in the middle */
190             {
191 70830           UV hi_m = highp - hi_d*30;
192 70830           UV count_bytes = hi_d - lo_d + (hi_m == 29);
193 70830 100         if (count_bytes > 0) {
194 65277           count += count_zero_bits(sieve+lo_d, count_bytes);
195 65277           lowp += 30*count_bytes;
196             }
197             }
198 70830 100         if (highp < lowp)
199 4638           return count;
200              
201             /* Count last fragment */
202 1508344 50         START_DO_FOR_EACH_SIEVE_PRIME(sieve, 0, lowp, highp)
    100          
    100          
    100          
    100          
203 170140           count++;
204 66192           END_DO_FOR_EACH_SIEVE_PRIME;
205              
206 66192           return count;
207             }
208              
209              
210             /*
211             * The pi(x) prime count functions. prime_count(x) gives an exact number,
212             * but requires determining all the primes up to x, so will be much slower.
213             *
214             * prime_count_lower(x) and prime_count_upper(x) give lower and upper limits,
215             * which will bound the exact value. These bounds should be fairly tight.
216             *
217             * pi_upper(x) - pi(x) pi_lower(x) - pi(x)
218             * < 10 for x < 5_371 < 10 for x < 9_437
219             * < 50 for x < 295_816 < 50 for x < 136_993
220             * < 100 for x < 1_761_655 < 100 for x < 909_911
221             * < 200 for x < 9_987_821 < 200 for x < 8_787_901
222             * < 400 for x < 34_762_891 < 400 for x < 30_332_723
223             * < 1000 for x < 372_748_528 < 1000 for x < 233_000_533
224             * < 5000 for x < 1_882_595_905 < 5000 for x < over 4300M
225             *
226             * The average of the upper and lower bounds is within 9 for all x < 15809, and
227             * within 50 for all x < 1_763_367.
228             *
229             * It is common to use the following Chebyshev inequality for x >= 17:
230             * 1*x/logx <-> 1.25506*x/logx
231             * but this gives terribly loose bounds.
232             *
233             * Rosser and Schoenfeld's bound for x >= 67 of
234             * x/(logx-1/2) <-> x/(logx-3/2)
235             * is much tighter. These bounds can be tightened even more.
236             *
237             * The formulas of Dusart for higher x are better yet. I recommend the paper
238             * by Burde for further information. Dusart's thesis is also a good resource.
239             *
240             * I have tweaked the bounds formulas for small (under 70_000M) numbers so they
241             * are tighter. These bounds are verified via trial. The Dusart bounds
242             * (1.8 and 2.51) are used for larger numbers since those are proven.
243             *
244             */
245              
246             #include "prime_count_tables.h"
247              
248 95477           UV segment_prime_count(UV low, UV high)
249             {
250             const unsigned char* cache_sieve;
251             unsigned char* segment;
252             UV segment_size, low_d, high_d;
253 95477           UV count = 0;
254              
255 95477 100         if ((low <= 2) && (high >= 2)) count++;
    100          
256 95477 100         if ((low <= 3) && (high >= 3)) count++;
    100          
257 95477 100         if ((low <= 5) && (high >= 5)) count++;
    100          
258 95477 100         if (low < 7) low = 7;
259              
260 95477 100         if (low > high) return count;
261              
262             #if !defined(BENCH_SEGCOUNT)
263 80207 100         if (low == 7 && high <= 30*NPRIME_SIEVE30) {
    100          
264 76353           count += count_segment_ranged(prime_sieve30, NPRIME_SIEVE30, low, high);
265 76353           return count;
266             }
267              
268             /* If we have sparse prime count tables, use them here. These will adjust
269             * 'low' and 'count' appropriately for a value slightly less than ours.
270             * This should leave just a small amount of sieving left. They stop at
271             * some point, e.g. 3000M, so we'll get the answer to that point then have
272             * to sieve all the rest. We should be using LMO or Lehmer much earlier. */
273             #ifdef APPLY_TABLES
274 409860 100         APPLY_TABLES
    50          
    100          
    100          
    50          
    100          
    100          
    50          
    100          
    100          
    50          
    100          
    100          
    50          
    50          
    100          
    50          
    0          
    0          
    0          
    0          
    0          
275             #endif
276             #endif
277              
278 3854           low_d = low/30;
279 3854           high_d = high/30;
280              
281             /* Count full bytes only -- no fragments from primary cache */
282 3854           segment_size = get_prime_cache(0, &cache_sieve) / 30;
283 3854 100         if (segment_size < high_d) {
284             /* Expand sieve to sqrt(n) */
285 285 50         UV endp = (high_d >= (UV_MAX/30)) ? UV_MAX-2 : 30*high_d+29;
286 285           UV newsize = (UV)isqrt(endp)+1;
287 285 100         if (newsize > 2642245) newsize = 2642245; /* Limit to icbrt(2^64) */
288             release_prime_cache(cache_sieve);
289 285           segment_size = get_prime_cache( newsize, &cache_sieve) / 30;
290             }
291              
292 3854 50         if ( (segment_size > 0) && (low_d <= segment_size) ) {
    100          
293             /* Count all the primes in the primary cache in our range */
294 3569           count += count_segment_ranged(cache_sieve, segment_size, low, high);
295              
296 3569 50         if (high_d < segment_size) {
297             release_prime_cache(cache_sieve);
298 3569           return count;
299             }
300              
301 0           low_d = segment_size;
302 0 0         if (30*low_d > low) low = 30*low_d;
303             }
304             release_prime_cache(cache_sieve);
305              
306             /* More primes needed. Repeatedly segment sieve. */
307             {
308 285           void* ctx = start_segment_primes(low, high, &segment);
309             UV seg_base, seg_low, seg_high;
310 570 100         while (next_segment_primes(ctx, &seg_base, &seg_low, &seg_high)) {
311 285           segment_size = seg_high/30 - seg_low/30 + 1;
312 285           count += count_segment_ranged(segment, segment_size, seg_low-seg_base, seg_high-seg_base);
313             }
314 285           end_segment_primes(ctx);
315             }
316              
317 285           return count;
318             }
319              
320 150           UV prime_count_range(UV lo, UV hi)
321             {
322 150 50         if (lo > hi || hi < 2)
    100          
323 8           return 0;
324              
325             #if defined(BENCH_SEGCOUNT)
326             return segment_prime_count(lo, hi);
327             #endif
328              
329             /* We use table acceleration so this is preferable for small inputs */
330 142 100         if (hi < _MPU_LMO_CROSSOVER) return segment_prime_count(lo, hi);
331              
332             { /* Rough empirical threshold for when segment faster than LMO */
333 17           UV range_threshold = hi / (isqrt(hi)/200);
334 17 100         if ( (hi-lo+1) < range_threshold )
335 12           return segment_prime_count(lo, hi);
336             }
337 5 50         return LMO_prime_count(hi) - ((lo < 2) ? 0 : LMO_prime_count(lo-1));
338             }
339              
340 69634           UV prime_count(UV n)
341             {
342 69634 50         if (n < 2) return 0;
343              
344             /* We use table acceleration so this is preferable for small inputs */
345 69634 100         if (n < _MPU_LMO_CROSSOVER) return segment_prime_count(0, n);
346              
347 782           return LMO_prime_count(n);
348             }
349              
350 9623           UV prime_count_approx(UV n)
351             {
352 9623 100         if (n < 3000000) return segment_prime_count(2, n);
353 214           return (UV) (RiemannR((long double) n, 1e-6) + 0.5);
354             }
355              
356              
357 7016           UV prime_count_lower(UV n)
358             {
359             long double fn, fl1, fl2, lower, a;
360              
361 7016 100         if (n < 33000) return segment_prime_count(2, n);
362              
363 413           fn = (long double) n;
364 413           fl1 = logl(n);
365 413           fl2 = fl1 * fl1;
366              
367             /* Axler 2014: https://arxiv.org/abs/1409.1780 (v7 2016), Cor 3.6
368             * show variations of this. */
369              
370 413 100         if (n <= 300070) { /* Quite accurate and avoids calling Li for speed. */
371             /* Based on Axler 2022, page 9, Corollary 5.1 */
372 263 100         a = (n < 69720) ? 905 :
    50          
    100          
    100          
    50          
373             (n < 70120) ? 961 :
374             (n < 88800) ? 918.2 :
375             (n < 176000) ? 887.7 :
376             (n < 299270) ? 839.46 :
377             846.66; /* Good to 300071 */
378 263           lower = fn / (fl1 - 1 - 1/fl1 - 2.975666/fl2 - 13.024334/(fl1*fl2) + a/(fl2*fl2));
379 150 100         } else if (n < UVCONST(4000000000)) {
380             /* Loose enough that FP differences in Li(n) should be ok. */
381 130           a = (n < 88783) ? 4.0L
382 260 50         : (n < 300000) ? -3.0L
383 260 50         : (n < 303000) ? 5.0L
384 260 50         : (n < 1100000) ? -7.0L
385 248 100         : (n < 4500000) ? -37.0L
386 225 100         : (n < 10200000) ? -70.0L
387 199 100         : (n < 36900000) ? -53.0L
388 159 100         : (n < 38100000) ? -29.0L
389 67 50         : -84.0L;
390 130           lower = Li(fn) - (sqrtl(fn)/fl1) * (1.94L + 2.50L/fl1 + a/fl2);
391 20 100         } else if (fn < 1e19) { /* Büthe 2015 1.9 1511.02032v1.pdf */
392 18           lower = Li(fn) - (sqrtl(fn)/fl1) * (1.94L + 3.88L/fl1 + 27.57L/fl2);
393             } else { /* Büthe 2014 v3 7.2 1410.7015v3.pdf */
394 2           lower = Li(fn) - fl1*sqrtl(fn)/25.132741228718345907701147L;
395             }
396 413           return (UV) ceill(lower);
397             }
398              
399             typedef struct {
400             UV thresh;
401             float aval;
402             } thresh_t;
403              
404             static const thresh_t _upper_thresh[] = {
405             { 59000, 2.48f },
406             { 355991, 2.54f },
407             { 3550000, 2.51f },
408             { 3560000, 2.49f },
409             { 5000000, 2.48f },
410             { 8000000, 2.47f },
411             { 13000000, 2.46f },
412             { 18000000, 2.45f },
413             { 31000000, 2.44f },
414             { 41000000, 2.43f },
415             { 48000000, 2.42f },
416             { 119000000, 2.41f },
417             { 182000000, 2.40f },
418             { 192000000, 2.395f },
419             { 213000000, 2.390f },
420             { 271000000, 2.385f },
421             { 322000000, 2.380f },
422             { 400000000, 2.375f },
423             { 510000000, 2.370f },
424             { 682000000, 2.367f },
425             { UVCONST(2953652287), 2.362f }
426             };
427             #define NUPPER_THRESH (sizeof(_upper_thresh)/sizeof(_upper_thresh[0]))
428              
429 7455           UV prime_count_upper(UV n)
430             {
431             int i;
432             long double fn, fl1, fl2, upper, a;
433              
434 7455 100         if (n < 33000) return segment_prime_count(2, n);
435              
436 815           fn = (long double) n;
437 815           fl1 = logl(n);
438 815           fl2 = fl1 * fl1;
439              
440             /* Axler 2014: https://arxiv.org/abs/1409.1780 (v7 2016), Cor 3.5
441             *
442             * upper = fn/(fl1-1.0L-1.0L/fl1-3.35L/fl2-12.65L/(fl2*fl1)-89.6L/(fl2*fl2));
443             * return (UV) floorl(upper);
444             *
445             * Axler 2022: https://arxiv.org/pdf/2203.05917.pdf (v4 2022) improves this.
446             */
447              
448 815 100         if (BITS_PER_WORD == 32 || fn <= 821800000.0) { /* Dusart 2010, page 2 */
449 3493 50         for (i = 0; i < (int)NUPPER_THRESH; i++)
450 3493 100         if (n < _upper_thresh[i].thresh)
451 770           break;
452 770 50         a = (i < (int)NUPPER_THRESH) ? _upper_thresh[i].aval : 2.334L;
453 770           upper = fn/fl1 * (1.0L + 1.0L/fl1 + a/fl2);
454 45 100         } else if (fn < 1e19) { /* Büthe 2015 1.10 Skewes number lower limit */
455 43           a = (fn < 1100000000.0) ? 0.032 /* Empirical */
456 43 100         : (fn < 10010000000.0) ? 0.027 /* Empirical */
    100          
    100          
457             : (fn < 101260000000.0) ? 0.021 /* Empirical */
458             : 0.0;
459 43           upper = Li(fn) - a * fl1*sqrtl(fn)/25.132741228718345907701147L;
460             } else { /* Büthe 2014 7.4 */
461 2           upper = Li(fn) + fl1*sqrtl(fn)/25.132741228718345907701147L;
462             }
463 815           return (UV) floorl(upper);
464             }
465              
466 623           static void simple_nth_limits(UV *lo, UV *hi, long double n, long double logn, long double loglogn) {
467 623 100         const long double a = (n < 228) ? .6483 : (n < 948) ? .8032 : (n < 2195) ? .8800 : (n < 39017) ? .9019 : .9484;
    100          
    100          
    100          
468 623           *lo = n * (logn + loglogn - 1.0 + ((loglogn-2.10)/logn));
469 623           *hi = n * (logn + loglogn - a);
470 623 50         if (*hi < *lo) *hi = MPU_MAX_PRIME;
471 623           }
472              
473             /* The nth prime will be less or equal to this number */
474 630           UV nth_prime_upper(UV n)
475             {
476             long double fn, flogn, flog2n, upper, c, d;
477              
478 630 100         if (n < NPRIMES_SMALL)
479 211           return primes_small[n];
480 419 100         if (n >= MPU_MAX_PRIME_IDX)
481 1 50         return n == MPU_MAX_PRIME_IDX ? MPU_MAX_PRIME : 0;
482              
483 418           fn = (long double) n;
484 418           flogn = logl(n);
485 418           flog2n = logl(flogn); /* Note distinction between log_2(n) and log^2(n) */
486              
487             /* Binary search on prime count lower. Good but quite slow. */
488 418 100         if (n < 15360) {
489             UV lo,hi;
490 306           simple_nth_limits(&lo, &hi, fn, flogn, flog2n);
491 2277 100         while (lo < hi) {
492 1971           UV mid = lo + (hi-lo)/2;
493 1971 100         if (prime_count_lower(mid) < n) lo = mid+1;
494 957           else hi = mid;
495             }
496 306           return lo;
497             }
498              
499             /* See: Axler 2013, Dusart 2010 */
500             /* Axler 2017: http://arxiv.org/pdf/1706.03651.pdf */
501              
502 112 100         if (n >= 46254381) { c = 2.00; d = 10.667; } /* Axler 2017 Cor 1.2 */
503 94 100         else if (n >= 8009824) { c = 2.00; d = 10.273; } /* Axler 2013 Kor G */
504             /* This is about 3x better than Dusart (2010) for 688382-8009823:
505             *
506             * else if (n >= 688382) { c = 2.30; d = 0.5730; }
507             *
508             * but we can split the range and get another 2x improvement in MSE.
509             */
510 76 100         else if (n >= 5450000) { c = 2.00; d = 10.1335; } /*5450-8009 */
511 64 100         else if (n >= 3906280) { c = 1.67; d = 20.2675; } /*3906-5450 */
512 61 100         else if (n >= 2110840) { c = 2.51; d = -5.5714; } /*2110-3906 */
513 56 100         else if (n >= 876700) { c = 2.49; d = -4.5129; } /* 877-2110 */
514 49 100         else if (n >= 688382) { c = 3.31; d = -26.3858; } /* 688-877 */
515             /* Use the Axler framework to get good bounds for smaller inputs. */
516 44 50         else if (n >= 575750) { c =-0.79; d = 83.5215; } /* 580-688 */
517 44 50         else if (n >= 467650) { c = 0.93; d = 37.1597; } /* 467-580 */
518 44 100         else if (n >= 382440) { c = 2.92; d = -15.4768; } /* 382-467 */
519 40 100         else if (n >= 301130) { c = 5.92; d = -91.3415; } /* 301-382 */
520 39 100         else if (n >= 138630) { c = 2.01; d = 7.2842; } /* 138-301 */
521 36 100         else if (n >= 85820) { c = 2.07; d = 5.2103; } /* 86-138 */
522 23 100         else if (n >= 39016) { c = 2.76; d = -11.5918; } /* 39- 86 */
523 6 50         else if (n >= 31490) { c = 1.49; d = 15.1821; } /* 31- 39 */
524 6 100         else if (n >= 25070) { c =11.89; d =-197.8951; } /* 25- 31 */
525 5 50         else if (n >= 15359) { c = 4.80; d = -51.5928; } /* 15- 25 */
526 0           else { c = 3.92; d = -33.3994; } /* 0- 15 */
527              
528 112           upper = fn * ( flogn + flog2n - 1.0 + ((flog2n-c)/flogn)
529 112           - (flog2n*flog2n-6*flog2n+d)/(2*flogn*flogn) );
530              
531 112 50         if (upper >= (long double)UV_MAX) {
532 0 0         if (n <= MPU_MAX_PRIME_IDX) return MPU_MAX_PRIME;
533 0           croak("nth_prime_upper(%"UVuf") overflow", n);
534             }
535              
536 112           return (UV) floorl(upper);
537             }
538              
539             /* The nth prime will be greater than or equal to this number */
540 508           UV nth_prime_lower(UV n)
541             {
542             double fn, flogn, flog2n;
543             UV plower;
544              
545 508 100         if (n < NPRIMES_SMALL)
546 139           return primes_small[n];
547 369 100         if (n >= MPU_MAX_PRIME_IDX)
548 2 50         return n == MPU_MAX_PRIME_IDX ? MPU_MAX_PRIME : 0;
549              
550 367           fn = (double) n;
551 367           flogn = log(n);
552 367           flog2n = log(flogn);
553              
554             /* For small values, do a binary search on the inverse prime count */
555 367 100         if (n < 2000000) {
556             UV lo,hi;
557 317           simple_nth_limits(&lo, &hi, fn, flogn, flog2n);
558 2531 100         while (lo < hi) {
559 2214           UV mid = lo + (hi-lo)/2;
560 2214 100         if (prime_count_upper(mid) < n) lo = mid+1;
561 1174           else hi = mid;
562             }
563 317           return lo;
564             }
565              
566             { /* Axler 2017 http://arxiv.org/pdf/1706.03651.pdf Corollary 1.4 */
567 50 100         double b1 = (n < 56000000) ? 11.200 : 11.50800000002;
568 50           double lower = fn * (flogn + flog2n-1.0 + ((flog2n-2.00)/flogn) - ((flog2n*flog2n-6*flog2n+b1)/(2*flogn*flogn)));
569 50           plower = (UV) ceill(lower);
570             }
571 50           return plower < MPU_MAX_PRIME ? plower : MPU_MAX_PRIME;
572             }
573              
574 26           UV nth_prime_approx(UV n)
575             {
576 26 100         return (n < NPRIMES_SMALL) ? primes_small[n] : inverse_R(n);
577             }
578              
579              
580 222           UV nth_prime(UV n)
581             {
582             const unsigned char* cache_sieve;
583             unsigned char* segment;
584             UV upper_limit, segbase, segment_size, p, count, target;
585              
586             /* If very small, return the table entry */
587 222 100         if (n < NPRIMES_SMALL)
588 174           return primes_small[n];
589 48 50         if (n >= MPU_MAX_PRIME_IDX)
590 0 0         return n == MPU_MAX_PRIME_IDX ? MPU_MAX_PRIME : 0;
591              
592             /* Determine a bound on the nth prime. We know it comes before this. */
593 48           upper_limit = nth_prime_upper(n);
594 48 50         MPUassert(upper_limit > 0, "nth_prime got an upper limit of 0");
595 48           p = count = 0;
596 48           target = n-3;
597              
598             /* For relatively small values, generate a sieve and count the results.
599             *
600             * For larger values, compute an approximate low estimate, use our fast
601             * prime count, then segment sieve forwards or backwards for the rest.
602             */
603 48 100         if (upper_limit <= get_prime_cache(0, 0) || upper_limit <= 32*1024*30) {
    100          
604             /* Generate a sieve and count. */
605 31           segment_size = get_prime_cache(upper_limit, &cache_sieve) / 30;
606             /* Count up everything in the cached sieve. */
607 31 50         if (segment_size > 0)
608 31           count += count_segment_maxcount(cache_sieve, 0, segment_size, target, &p);
609             release_prime_cache(cache_sieve);
610             } else {
611             /* A binary search on RiemannR is nice, but ends up either often being
612             * being higher (requiring going backwards) or biased and then far too
613             * low. Using the inverse Li is easier and more consistent. */
614 17           UV lower_limit = inverse_li(n);
615             /* For even better performance, add in half the usual correction, which
616             * will get us even closer, so even less sieving required. However, it
617             * is now possible to get a result higher than the value, so we'll need
618             * to handle that case. It still ends up being a better deal than R,
619             * given that we don't have a fast backward sieve. */
620 17           lower_limit += inverse_li(isqrt(n))/4;
621 17           segment_size = lower_limit / 30;
622 17           lower_limit = 30 * segment_size - 1;
623 17           count = prime_count(lower_limit);
624              
625             /* printf("We've estimated %lu too %s.\n", (count>n)?count-n:n-count, (count>n)?"FAR":"little"); */
626             /* printf("Our limit %lu %s a prime\n", lower_limit, is_prime(lower_limit) ? "is" : "is not"); */
627              
628 17 100         if (count >= n) { /* Too far. Walk backwards */
629 1 50         if (is_prime(lower_limit)) count--;
630 2 100         for (p = 0; p <= (count-n); p++)
631 1           lower_limit = prev_prime(lower_limit);
632 1           return lower_limit;
633             }
634 16           count -= 3;
635              
636             /* Make sure the segment siever won't have to keep resieving. */
637 16           prime_precalc(isqrt(upper_limit));
638             }
639              
640 47 100         if (count == target)
641 31           return p;
642              
643             /* Start segment sieving. Get memory to sieve into. */
644 16           segbase = segment_size;
645 16           segment = get_prime_segment(&segment_size);
646              
647 32 100         while (count < target) {
648             /* Limit the segment size if we know the answer comes earlier */
649 16 100         if ( (30*(segbase+segment_size)+29) > upper_limit )
650 15           segment_size = (upper_limit - segbase*30 + 30) / 30;
651              
652             /* Do the actual sieving in the range */
653 16           sieve_segment(segment, segbase, segbase + segment_size-1);
654              
655             /* Count up everything in this segment */
656 16           count += count_segment_maxcount(segment, 30*segbase, segment_size, target-count, &p);
657              
658 16 50         if (count < target)
659 0           segbase += segment_size;
660             }
661 16           release_prime_segment(segment);
662 16 50         MPUassert(count == target, "nth_prime got incorrect count");
663 16           return ( (segbase*30) + p );
664             }
665              
666              
667              
668             /******************************************************************************/
669             /* MISC */
670             /******************************************************************************/
671              
672              
673 788           double ramanujan_axler(long double n, long double c, long double d) {
674 788           long double res, U, c1, c2, log2 = logl(2), logn = logl(n), loglogn = logl(logn);
675              
676 788           c1 = 2*log2*log2 + log2 + c;
677 788           c2 = log2*log2*log2 + 2*log2*log2 + d;
678              
679 788           U = (log2 * logn*loglogn*loglogn - c1*logn*loglogn + c2*logn - log2*log2*loglogn + log2*log2*log2 + log2*log2)
680 788           / (logn*logn*logn*logn + logn*logn*logn*loglogn - logn*logn*logn*log2 - logn*logn*log2);
681              
682 788           res = 2*n * (1.0L + log2/logn - (log2*loglogn - log2*log2 - log2) / (logn*logn) + U);
683 788           return res;
684             }