line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
#include |
2
|
|
|
|
|
|
|
#include |
3
|
|
|
|
|
|
|
#include |
4
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
#define FUNC_popcnt 1 |
6
|
|
|
|
|
|
|
#define FUNC_isqrt 1 |
7
|
|
|
|
|
|
|
#include "ptypes.h" |
8
|
|
|
|
|
|
|
#include "sieve.h" |
9
|
|
|
|
|
|
|
#include "cache.h" |
10
|
|
|
|
|
|
|
#include "lmo.h" |
11
|
|
|
|
|
|
|
#include "constants.h" |
12
|
|
|
|
|
|
|
#include "prime_nth_count.h" |
13
|
|
|
|
|
|
|
#include "util.h" |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
#include |
16
|
|
|
|
|
|
|
#if _MSC_VER || defined(__IBMC__) || defined(__IBMCPP__) || (defined(__STDC_VERSION__) && __STDC_VERSION >= 199901L) |
17
|
|
|
|
|
|
|
/* math.h should give us these as functions or macros. |
18
|
|
|
|
|
|
|
* |
19
|
|
|
|
|
|
|
* extern long double floorl(long double); |
20
|
|
|
|
|
|
|
* extern long double ceill(long double); |
21
|
|
|
|
|
|
|
* extern long double sqrtl(long double); |
22
|
|
|
|
|
|
|
* extern long double logl(long double); |
23
|
|
|
|
|
|
|
*/ |
24
|
|
|
|
|
|
|
#else |
25
|
|
|
|
|
|
|
#define floorl(x) (long double) floor( (double) (x) ) |
26
|
|
|
|
|
|
|
#define ceill(x) (long double) ceil( (double) (x) ) |
27
|
|
|
|
|
|
|
#define sqrtl(x) (long double) sqrt( (double) (x) ) |
28
|
|
|
|
|
|
|
#define logl(x) (long double) log( (double) (x) ) |
29
|
|
|
|
|
|
|
#endif |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
#if defined(__GNUC__) |
32
|
|
|
|
|
|
|
#define word_unaligned(m,wordsize) ((uintptr_t)m & (wordsize-1)) |
33
|
|
|
|
|
|
|
#else /* uintptr_t is part of C99 */ |
34
|
|
|
|
|
|
|
#define word_unaligned(m,wordsize) ((unsigned int)m & (wordsize-1)) |
35
|
|
|
|
|
|
|
#endif |
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
/* TODO: This data is duplicated in util.c. */ |
38
|
|
|
|
|
|
|
static const unsigned char prime_sieve30[] = |
39
|
|
|
|
|
|
|
{0x01,0x20,0x10,0x81,0x49,0x24,0xc2,0x06,0x2a,0xb0,0xe1,0x0c,0x15,0x59,0x12, |
40
|
|
|
|
|
|
|
0x61,0x19,0xf3,0x2c,0x2c,0xc4,0x22,0xa6,0x5a,0x95,0x98,0x6d,0x42,0x87,0xe1, |
41
|
|
|
|
|
|
|
0x59,0xa9,0xa9,0x1c,0x52,0xd2,0x21,0xd5,0xb3,0xaa,0x26,0x5c,0x0f,0x60,0xfc, |
42
|
|
|
|
|
|
|
0xab,0x5e,0x07,0xd1,0x02,0xbb,0x16,0x99,0x09,0xec,0xc5,0x47,0xb3,0xd4,0xc5, |
43
|
|
|
|
|
|
|
0xba,0xee,0x40,0xab,0x73,0x3e,0x85,0x4c,0x37,0x43,0x73,0xb0,0xde,0xa7,0x8e, |
44
|
|
|
|
|
|
|
0x8e,0x64,0x3e,0xe8,0x10,0xab,0x69,0xe5,0xf7,0x1a,0x7c,0x73,0xb9,0x8d,0x04, |
45
|
|
|
|
|
|
|
0x51,0x9a,0x6d,0x70,0xa7,0x78,0x2d,0x6d,0x27,0x7e,0x9a,0xd9,0x1c,0x5f,0xee, |
46
|
|
|
|
|
|
|
0xc7,0x38,0xd9,0xc3,0x7e,0x14,0x66,0x72,0xae,0x77,0xc1,0xdb,0x0c,0xcc,0xb2, |
47
|
|
|
|
|
|
|
0xa5,0x74,0xe3,0x58,0xd5,0x4b,0xa7,0xb3,0xb1,0xd9,0x09,0xe6,0x7d,0x23,0x7c, |
48
|
|
|
|
|
|
|
0x3c,0xd3,0x0e,0xc7,0xfd,0x4a,0x32,0x32,0xfd,0x4d,0xb5,0x6b,0xf3,0xa8,0xb3, |
49
|
|
|
|
|
|
|
0x85,0xcf,0xbc,0xf4,0x0e,0x34,0xbb,0x93,0xdb,0x07,0xe6,0xfe,0x6a,0x57,0xa3, |
50
|
|
|
|
|
|
|
0x8c,0x15,0x72,0xdb,0x69,0xd4,0xaf,0x59,0xdd,0xe1,0x3b,0x2e,0xb7,0xf9,0x2b, |
51
|
|
|
|
|
|
|
0xc5,0xd0,0x8b,0x63,0xf8,0x95,0xfa,0x77,0x40,0x97,0xea,0xd1,0x9f,0xaa,0x1c, |
52
|
|
|
|
|
|
|
0x48,0xae,0x67,0xf7,0xeb,0x79,0xa5,0x55,0xba,0xb2,0xb6,0x8f,0xd8,0x2d,0x6c, |
53
|
|
|
|
|
|
|
0x2a,0x35,0x54,0xfd,0x7c,0x9e,0xfa,0xdb,0x31,0x78,0xdd,0x3d,0x56,0x52,0xe7, |
54
|
|
|
|
|
|
|
0x73,0xb2,0x87,0x2e,0x76,0xe9,0x4f,0xa8,0x38,0x9d,0x5d,0x3f,0xcb,0xdb,0xad, |
55
|
|
|
|
|
|
|
0x51,0xa5,0xbf,0xcd,0x72,0xde,0xf7,0xbc,0xcb,0x49,0x2d,0x49,0x26,0xe6,0x1e, |
56
|
|
|
|
|
|
|
0x9f,0x98,0xe5,0xc6,0x9f,0x2f,0xbb,0x85,0x6b,0x65,0xf6,0x77,0x7c,0x57,0x8b, |
57
|
|
|
|
|
|
|
0xaa,0xef,0xd8,0x5e,0xa2,0x97,0xe1,0xdc,0x37,0xcd,0x1f,0xe6,0xfc,0xbb,0x8c, |
58
|
|
|
|
|
|
|
0xb7,0x4e,0xc7,0x3c,0x19,0xd5,0xa8,0x9e,0x67,0x4a,0xe3,0xf5,0x97,0x3a,0x7e, |
59
|
|
|
|
|
|
|
0x70,0x53,0xfd,0xd6,0xe5,0xb8,0x1c,0x6b,0xee,0xb1,0x9b,0xd1,0xeb,0x34,0xc2, |
60
|
|
|
|
|
|
|
0x23,0xeb,0x3a,0xf9,0xef,0x16,0xd6,0x4e,0x7d,0x16,0xcf,0xb8,0x1c,0xcb,0xe6, |
61
|
|
|
|
|
|
|
0x3c,0xda,0xf5,0xcf}; |
62
|
|
|
|
|
|
|
#define NPRIME_SIEVE30 (sizeof(prime_sieve30)/sizeof(prime_sieve30[0])) |
63
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
static const unsigned short primes_small[] = |
65
|
|
|
|
|
|
|
{0,2,3,5,7,11,13,17,19,23,29,31,37,41,43,47,53,59,61,67,71,73,79,83,89,97, |
66
|
|
|
|
|
|
|
101,103,107,109,113,127,131,137,139,149,151,157,163,167,173,179,181,191, |
67
|
|
|
|
|
|
|
193,197,199,211,223,227,229,233,239,241,251,257,263,269,271,277,281,283, |
68
|
|
|
|
|
|
|
293,307,311,313,317,331,337,347,349,353,359,367,373,379,383,389,397,401, |
69
|
|
|
|
|
|
|
409,419,421,431,433,439,443,449,457,461,463,467,479,487,491,499}; |
70
|
|
|
|
|
|
|
#define NPRIMES_SMALL (sizeof(primes_small)/sizeof(primes_small[0])) |
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
static const unsigned char byte_zeros[256] = |
74
|
|
|
|
|
|
|
{8,7,7,6,7,6,6,5,7,6,6,5,6,5,5,4,7,6,6,5,6,5,5,4,6,5,5,4,5,4,4,3, |
75
|
|
|
|
|
|
|
7,6,6,5,6,5,5,4,6,5,5,4,5,4,4,3,6,5,5,4,5,4,4,3,5,4,4,3,4,3,3,2, |
76
|
|
|
|
|
|
|
7,6,6,5,6,5,5,4,6,5,5,4,5,4,4,3,6,5,5,4,5,4,4,3,5,4,4,3,4,3,3,2, |
77
|
|
|
|
|
|
|
6,5,5,4,5,4,4,3,5,4,4,3,4,3,3,2,5,4,4,3,4,3,3,2,4,3,3,2,3,2,2,1, |
78
|
|
|
|
|
|
|
7,6,6,5,6,5,5,4,6,5,5,4,5,4,4,3,6,5,5,4,5,4,4,3,5,4,4,3,4,3,3,2, |
79
|
|
|
|
|
|
|
6,5,5,4,5,4,4,3,5,4,4,3,4,3,3,2,5,4,4,3,4,3,3,2,4,3,3,2,3,2,2,1, |
80
|
|
|
|
|
|
|
6,5,5,4,5,4,4,3,5,4,4,3,4,3,3,2,5,4,4,3,4,3,3,2,4,3,3,2,3,2,2,1, |
81
|
|
|
|
|
|
|
5,4,4,3,4,3,3,2,4,3,3,2,3,2,2,1,4,3,3,2,3,2,2,1,3,2,2,1,2,1,1,0}; |
82
|
72404
|
|
|
|
|
|
static UV count_zero_bits(const unsigned char* m, UV nbytes) |
83
|
|
|
|
|
|
|
{ |
84
|
72404
|
|
|
|
|
|
UV count = 0; |
85
|
|
|
|
|
|
|
#if BITS_PER_WORD == 64 |
86
|
72404
|
100
|
|
|
|
|
if (nbytes >= 16) { |
87
|
259643
|
100
|
|
|
|
|
while ( word_unaligned(m,sizeof(UV)) && nbytes--) |
|
|
50
|
|
|
|
|
|
88
|
224578
|
|
|
|
|
|
count += byte_zeros[*m++]; |
89
|
35065
|
50
|
|
|
|
|
if (nbytes >= 8) { |
90
|
35065
|
|
|
|
|
|
UV* wordptr = (UV*)m; |
91
|
35065
|
|
|
|
|
|
UV nwords = nbytes / 8; |
92
|
35065
|
|
|
|
|
|
UV nzeros = nwords * 64; |
93
|
35065
|
|
|
|
|
|
m += nwords * 8; |
94
|
35065
|
|
|
|
|
|
nbytes %= 8; |
95
|
411509
|
100
|
|
|
|
|
while (nwords--) |
96
|
376444
|
|
|
|
|
|
nzeros -= popcnt(*wordptr++); |
97
|
35065
|
|
|
|
|
|
count += nzeros; |
98
|
|
|
|
|
|
|
} |
99
|
|
|
|
|
|
|
} |
100
|
|
|
|
|
|
|
#endif |
101
|
381815
|
100
|
|
|
|
|
while (nbytes--) |
102
|
309411
|
|
|
|
|
|
count += byte_zeros[*m++]; |
103
|
72404
|
|
|
|
|
|
return count; |
104
|
|
|
|
|
|
|
} |
105
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
/* Given a sieve of size nbytes, walk it counting zeros (primes) until: |
107
|
|
|
|
|
|
|
* |
108
|
|
|
|
|
|
|
* (1) we counted them all: return the count, which will be less than maxcount. |
109
|
|
|
|
|
|
|
* |
110
|
|
|
|
|
|
|
* (2) we hit maxcount: set position to the index of the maxcount'th prime |
111
|
|
|
|
|
|
|
* and return count (which will be equal to maxcount). |
112
|
|
|
|
|
|
|
*/ |
113
|
1085
|
|
|
|
|
|
static UV count_segment_maxcount(const unsigned char* sieve, UV base, UV nbytes, UV maxcount, UV* pos) |
114
|
|
|
|
|
|
|
{ |
115
|
1085
|
|
|
|
|
|
UV count = 0; |
116
|
1085
|
|
|
|
|
|
UV byte = 0; |
117
|
1085
|
|
|
|
|
|
const unsigned char* sieveptr = sieve; |
118
|
1085
|
|
|
|
|
|
const unsigned char* maxsieve = sieve + nbytes; |
119
|
|
|
|
|
|
|
|
120
|
1085
|
50
|
|
|
|
|
MPUassert(sieve != 0, "count_segment_maxcount incorrect args"); |
121
|
1085
|
50
|
|
|
|
|
MPUassert(pos != 0, "count_segment_maxcount incorrect args"); |
122
|
1085
|
|
|
|
|
|
*pos = 0; |
123
|
1085
|
50
|
|
|
|
|
if ( (nbytes == 0) || (maxcount == 0) ) |
|
|
50
|
|
|
|
|
|
124
|
0
|
|
|
|
|
|
return 0; |
125
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
/* Do fixed-length word counts to start, with possible overcounting */ |
127
|
4501
|
100
|
|
|
|
|
while ((count+64) < maxcount && sieveptr < maxsieve) { |
|
|
50
|
|
|
|
|
|
128
|
3416
|
|
|
|
|
|
UV top = base + 3*maxcount; |
129
|
3416
|
100
|
|
|
|
|
UV div = (top < 8000) ? 8 : /* 8 cannot overcount */ |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
130
|
|
|
|
|
|
|
(top < 1000000) ? 4 : |
131
|
|
|
|
|
|
|
(top < 10000000) ? 3 : 2; |
132
|
3416
|
|
|
|
|
|
UV minbytes = (maxcount-count)/div; |
133
|
3416
|
50
|
|
|
|
|
if (minbytes > (UV)(maxsieve-sieveptr)) minbytes = maxsieve-sieveptr; |
134
|
3416
|
|
|
|
|
|
count += count_zero_bits(sieveptr, minbytes); |
135
|
3416
|
|
|
|
|
|
sieveptr += minbytes; |
136
|
|
|
|
|
|
|
} |
137
|
|
|
|
|
|
|
/* Count until we reach the end or >= maxcount */ |
138
|
15194
|
50
|
|
|
|
|
while ( (sieveptr < maxsieve) && (count < maxcount) ) |
|
|
100
|
|
|
|
|
|
139
|
14109
|
|
|
|
|
|
count += byte_zeros[*sieveptr++]; |
140
|
|
|
|
|
|
|
/* If we went too far, back up. */ |
141
|
2170
|
100
|
|
|
|
|
while (count >= maxcount) |
142
|
1085
|
|
|
|
|
|
count -= byte_zeros[*--sieveptr]; |
143
|
|
|
|
|
|
|
/* We counted this many bytes */ |
144
|
1085
|
|
|
|
|
|
byte = sieveptr - sieve; |
145
|
|
|
|
|
|
|
|
146
|
1085
|
50
|
|
|
|
|
MPUassert(count < maxcount, "count_segment_maxcount wrong count"); |
147
|
|
|
|
|
|
|
|
148
|
1085
|
50
|
|
|
|
|
if (byte == nbytes) |
149
|
0
|
|
|
|
|
|
return count; |
150
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
/* The result is somewhere in the next byte */ |
152
|
16689
|
50
|
|
|
|
|
START_DO_FOR_EACH_SIEVE_PRIME(sieve, 0, byte*30+1, nbytes*30-1) |
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
153
|
2772
|
100
|
|
|
|
|
if (++count == maxcount) { *pos = p; return count; } |
154
|
0
|
|
|
|
|
|
END_DO_FOR_EACH_SIEVE_PRIME; |
155
|
|
|
|
|
|
|
|
156
|
0
|
|
|
|
|
|
MPUassert(0, "count_segment_maxcount failure"); |
157
|
|
|
|
|
|
|
return 0; |
158
|
|
|
|
|
|
|
} |
159
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
/* Given a sieve of size nbytes, counting zeros (primes) but excluding the |
161
|
|
|
|
|
|
|
* areas outside lowp and highp. |
162
|
|
|
|
|
|
|
*/ |
163
|
71444
|
|
|
|
|
|
static UV count_segment_ranged(const unsigned char* sieve, UV nbytes, UV lowp, UV highp) |
164
|
|
|
|
|
|
|
{ |
165
|
|
|
|
|
|
|
UV count, hi_d, lo_d, lo_m; |
166
|
|
|
|
|
|
|
|
167
|
71444
|
50
|
|
|
|
|
MPUassert( sieve != 0, "count_segment_ranged incorrect args"); |
168
|
71444
|
50
|
|
|
|
|
if (nbytes == 0) return 0; |
169
|
|
|
|
|
|
|
|
170
|
71444
|
|
|
|
|
|
count = 0; |
171
|
71444
|
|
|
|
|
|
hi_d = highp/30; |
172
|
|
|
|
|
|
|
|
173
|
71444
|
50
|
|
|
|
|
if (hi_d >= nbytes) { |
174
|
0
|
|
|
|
|
|
hi_d = nbytes-1; |
175
|
0
|
|
|
|
|
|
highp = hi_d*30+29; |
176
|
|
|
|
|
|
|
} |
177
|
|
|
|
|
|
|
|
178
|
71444
|
50
|
|
|
|
|
if (highp < lowp) |
179
|
0
|
|
|
|
|
|
return 0; |
180
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
#if 0 |
182
|
|
|
|
|
|
|
/* Dead simple way */ |
183
|
|
|
|
|
|
|
START_DO_FOR_EACH_SIEVE_PRIME(sieve, 0, lowp, highp) |
184
|
|
|
|
|
|
|
count++; |
185
|
|
|
|
|
|
|
END_DO_FOR_EACH_SIEVE_PRIME; |
186
|
|
|
|
|
|
|
return count; |
187
|
|
|
|
|
|
|
#endif |
188
|
|
|
|
|
|
|
|
189
|
71444
|
|
|
|
|
|
lo_d = lowp/30; |
190
|
71444
|
|
|
|
|
|
lo_m = lowp - lo_d*30; |
191
|
|
|
|
|
|
|
/* Count first fragment */ |
192
|
71444
|
100
|
|
|
|
|
if (lo_m > 1) { |
193
|
69624
|
|
|
|
|
|
UV upper = (highp <= (lo_d*30+29)) ? highp : (lo_d*30+29); |
194
|
626009
|
50
|
|
|
|
|
START_DO_FOR_EACH_SIEVE_PRIME(sieve, 0, lowp, upper) |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
195
|
486728
|
|
|
|
|
|
count++; |
196
|
69624
|
|
|
|
|
|
END_DO_FOR_EACH_SIEVE_PRIME; |
197
|
69624
|
|
|
|
|
|
lowp = upper+2; |
198
|
69624
|
|
|
|
|
|
lo_d = lowp/30; |
199
|
|
|
|
|
|
|
} |
200
|
71444
|
100
|
|
|
|
|
if (highp < lowp) |
201
|
352
|
|
|
|
|
|
return count; |
202
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
/* Count bytes in the middle */ |
204
|
|
|
|
|
|
|
{ |
205
|
71092
|
|
|
|
|
|
UV hi_m = highp - hi_d*30; |
206
|
71092
|
|
|
|
|
|
UV count_bytes = hi_d - lo_d + (hi_m == 29); |
207
|
71092
|
100
|
|
|
|
|
if (count_bytes > 0) { |
208
|
68988
|
|
|
|
|
|
count += count_zero_bits(sieve+lo_d, count_bytes); |
209
|
68988
|
|
|
|
|
|
lowp += 30*count_bytes; |
210
|
|
|
|
|
|
|
} |
211
|
|
|
|
|
|
|
} |
212
|
71092
|
100
|
|
|
|
|
if (highp < lowp) |
213
|
4923
|
|
|
|
|
|
return count; |
214
|
|
|
|
|
|
|
|
215
|
|
|
|
|
|
|
/* Count last fragment */ |
216
|
1487031
|
50
|
|
|
|
|
START_DO_FOR_EACH_SIEVE_PRIME(sieve, 0, lowp, highp) |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
217
|
164714
|
|
|
|
|
|
count++; |
218
|
66169
|
|
|
|
|
|
END_DO_FOR_EACH_SIEVE_PRIME; |
219
|
|
|
|
|
|
|
|
220
|
66169
|
|
|
|
|
|
return count; |
221
|
|
|
|
|
|
|
} |
222
|
|
|
|
|
|
|
|
223
|
|
|
|
|
|
|
|
224
|
|
|
|
|
|
|
/* |
225
|
|
|
|
|
|
|
* The pi(x) prime count functions. prime_count(x) gives an exact number, |
226
|
|
|
|
|
|
|
* but requires determining all the primes up to x, so will be much slower. |
227
|
|
|
|
|
|
|
* |
228
|
|
|
|
|
|
|
* prime_count_lower(x) and prime_count_upper(x) give lower and upper limits, |
229
|
|
|
|
|
|
|
* which will bound the exact value. These bounds should be fairly tight. |
230
|
|
|
|
|
|
|
* |
231
|
|
|
|
|
|
|
* pi_upper(x) - pi(x) pi_lower(x) - pi(x) |
232
|
|
|
|
|
|
|
* < 10 for x < 5_371 < 10 for x < 9_437 |
233
|
|
|
|
|
|
|
* < 50 for x < 295_816 < 50 for x < 136_993 |
234
|
|
|
|
|
|
|
* < 100 for x < 1_761_655 < 100 for x < 909_911 |
235
|
|
|
|
|
|
|
* < 200 for x < 9_987_821 < 200 for x < 8_787_901 |
236
|
|
|
|
|
|
|
* < 400 for x < 34_762_891 < 400 for x < 30_332_723 |
237
|
|
|
|
|
|
|
* < 1000 for x < 372_748_528 < 1000 for x < 233_000_533 |
238
|
|
|
|
|
|
|
* < 5000 for x < 1_882_595_905 < 5000 for x < over 4300M |
239
|
|
|
|
|
|
|
* |
240
|
|
|
|
|
|
|
* The average of the upper and lower bounds is within 9 for all x < 15809, and |
241
|
|
|
|
|
|
|
* within 50 for all x < 1_763_367. |
242
|
|
|
|
|
|
|
* |
243
|
|
|
|
|
|
|
* It is common to use the following Chebyshev inequality for x >= 17: |
244
|
|
|
|
|
|
|
* 1*x/logx <-> 1.25506*x/logx |
245
|
|
|
|
|
|
|
* but this gives terribly loose bounds. |
246
|
|
|
|
|
|
|
* |
247
|
|
|
|
|
|
|
* Rosser and Schoenfeld's bound for x >= 67 of |
248
|
|
|
|
|
|
|
* x/(logx-1/2) <-> x/(logx-3/2) |
249
|
|
|
|
|
|
|
* is much tighter. These bounds can be tightened even more. |
250
|
|
|
|
|
|
|
* |
251
|
|
|
|
|
|
|
* The formulas of Dusart for higher x are better yet. I recommend the paper |
252
|
|
|
|
|
|
|
* by Burde for further information. Dusart's thesis is also a good resource. |
253
|
|
|
|
|
|
|
* |
254
|
|
|
|
|
|
|
* I have tweaked the bounds formulas for small (under 70_000M) numbers so they |
255
|
|
|
|
|
|
|
* are tighter. These bounds are verified via trial. The Dusart bounds |
256
|
|
|
|
|
|
|
* (1.8 and 2.51) are used for larger numbers since those are proven. |
257
|
|
|
|
|
|
|
* |
258
|
|
|
|
|
|
|
*/ |
259
|
|
|
|
|
|
|
|
260
|
|
|
|
|
|
|
#include "prime_count_tables.h" |
261
|
|
|
|
|
|
|
|
262
|
71488
|
|
|
|
|
|
UV segment_prime_count(UV low, UV high) |
263
|
|
|
|
|
|
|
{ |
264
|
|
|
|
|
|
|
const unsigned char* cache_sieve; |
265
|
|
|
|
|
|
|
unsigned char* segment; |
266
|
|
|
|
|
|
|
UV segment_size, low_d, high_d; |
267
|
71488
|
|
|
|
|
|
UV count = 0; |
268
|
|
|
|
|
|
|
|
269
|
71488
|
100
|
|
|
|
|
if ((low <= 2) && (high >= 2)) count++; |
|
|
100
|
|
|
|
|
|
270
|
71488
|
100
|
|
|
|
|
if ((low <= 3) && (high >= 3)) count++; |
|
|
100
|
|
|
|
|
|
271
|
71488
|
100
|
|
|
|
|
if ((low <= 5) && (high >= 5)) count++; |
|
|
100
|
|
|
|
|
|
272
|
71488
|
100
|
|
|
|
|
if (low < 7) low = 7; |
273
|
|
|
|
|
|
|
|
274
|
71488
|
100
|
|
|
|
|
if (low > high) return count; |
275
|
|
|
|
|
|
|
|
276
|
|
|
|
|
|
|
#if !defined(BENCH_SEGCOUNT) |
277
|
71435
|
100
|
|
|
|
|
if (low == 7 && high <= 30*NPRIME_SIEVE30) { |
|
|
100
|
|
|
|
|
|
278
|
69611
|
|
|
|
|
|
count += count_segment_ranged(prime_sieve30, NPRIME_SIEVE30, low, high); |
279
|
69611
|
|
|
|
|
|
return count; |
280
|
|
|
|
|
|
|
} |
281
|
|
|
|
|
|
|
|
282
|
|
|
|
|
|
|
/* If we have sparse prime count tables, use them here. These will adjust |
283
|
|
|
|
|
|
|
* 'low' and 'count' appropriately for a value slightly less than ours. |
284
|
|
|
|
|
|
|
* This should leave just a small amount of sieving left. They stop at |
285
|
|
|
|
|
|
|
* some point, e.g. 3000M, so we'll get the answer to that point then have |
286
|
|
|
|
|
|
|
* to sieve all the rest. We should be using LMO or Lehmer much earlier. */ |
287
|
|
|
|
|
|
|
#ifdef APPLY_TABLES |
288
|
329655
|
100
|
|
|
|
|
APPLY_TABLES |
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
289
|
|
|
|
|
|
|
#endif |
290
|
|
|
|
|
|
|
#endif |
291
|
|
|
|
|
|
|
|
292
|
1824
|
|
|
|
|
|
low_d = low/30; |
293
|
1824
|
|
|
|
|
|
high_d = high/30; |
294
|
|
|
|
|
|
|
|
295
|
|
|
|
|
|
|
/* Count full bytes only -- no fragments from primary cache */ |
296
|
1824
|
|
|
|
|
|
segment_size = get_prime_cache(0, &cache_sieve) / 30; |
297
|
1824
|
100
|
|
|
|
|
if (segment_size < high_d) { |
298
|
|
|
|
|
|
|
/* Expand sieve to sqrt(n) */ |
299
|
96
|
50
|
|
|
|
|
UV endp = (high_d >= (UV_MAX/30)) ? UV_MAX-2 : 30*high_d+29; |
300
|
|
|
|
|
|
|
release_prime_cache(cache_sieve); |
301
|
96
|
|
|
|
|
|
segment_size = get_prime_cache( isqrt(endp) + 1 , &cache_sieve) / 30; |
302
|
|
|
|
|
|
|
} |
303
|
|
|
|
|
|
|
|
304
|
1824
|
50
|
|
|
|
|
if ( (segment_size > 0) && (low_d <= segment_size) ) { |
|
|
100
|
|
|
|
|
|
305
|
|
|
|
|
|
|
/* Count all the primes in the primary cache in our range */ |
306
|
1728
|
|
|
|
|
|
count += count_segment_ranged(cache_sieve, segment_size, low, high); |
307
|
|
|
|
|
|
|
|
308
|
1728
|
50
|
|
|
|
|
if (high_d < segment_size) { |
309
|
|
|
|
|
|
|
release_prime_cache(cache_sieve); |
310
|
1728
|
|
|
|
|
|
return count; |
311
|
|
|
|
|
|
|
} |
312
|
|
|
|
|
|
|
|
313
|
0
|
|
|
|
|
|
low_d = segment_size; |
314
|
0
|
0
|
|
|
|
|
if (30*low_d > low) low = 30*low_d; |
315
|
|
|
|
|
|
|
} |
316
|
|
|
|
|
|
|
release_prime_cache(cache_sieve); |
317
|
|
|
|
|
|
|
|
318
|
|
|
|
|
|
|
/* More primes needed. Repeatedly segment sieve. */ |
319
|
|
|
|
|
|
|
{ |
320
|
96
|
|
|
|
|
|
void* ctx = start_segment_primes(low, high, &segment); |
321
|
|
|
|
|
|
|
UV seg_base, seg_low, seg_high; |
322
|
201
|
100
|
|
|
|
|
while (next_segment_primes(ctx, &seg_base, &seg_low, &seg_high)) { |
323
|
105
|
|
|
|
|
|
segment_size = seg_high/30 - seg_low/30 + 1; |
324
|
105
|
|
|
|
|
|
count += count_segment_ranged(segment, segment_size, seg_low-seg_base, seg_high-seg_base); |
325
|
|
|
|
|
|
|
} |
326
|
96
|
|
|
|
|
|
end_segment_primes(ctx); |
327
|
|
|
|
|
|
|
} |
328
|
|
|
|
|
|
|
|
329
|
71488
|
|
|
|
|
|
return count; |
330
|
|
|
|
|
|
|
} |
331
|
|
|
|
|
|
|
|
332
|
685
|
|
|
|
|
|
UV prime_count(UV lo, UV hi) |
333
|
|
|
|
|
|
|
{ |
334
|
685
|
50
|
|
|
|
|
if (lo > hi || hi < 2) |
|
|
100
|
|
|
|
|
|
335
|
1
|
|
|
|
|
|
return 0; |
336
|
|
|
|
|
|
|
|
337
|
|
|
|
|
|
|
#if defined(BENCH_SEGCOUNT) |
338
|
|
|
|
|
|
|
return segment_prime_count(lo, hi); |
339
|
|
|
|
|
|
|
#endif |
340
|
|
|
|
|
|
|
|
341
|
|
|
|
|
|
|
/* We use table acceleration so this is preferable for small inputs */ |
342
|
684
|
100
|
|
|
|
|
if (hi < _MPU_LMO_CROSSOVER) return segment_prime_count(lo, hi); |
343
|
|
|
|
|
|
|
|
344
|
|
|
|
|
|
|
{ /* Rough empirical threshold for when segment faster than LMO */ |
345
|
29
|
|
|
|
|
|
UV range_threshold = hi / (isqrt(hi)/200); |
346
|
29
|
100
|
|
|
|
|
if ( (hi-lo+1) < range_threshold ) |
347
|
10
|
|
|
|
|
|
return segment_prime_count(lo, hi); |
348
|
|
|
|
|
|
|
} |
349
|
19
|
50
|
|
|
|
|
return LMO_prime_count(hi) - ((lo < 2) ? 0 : LMO_prime_count(lo-1)); |
350
|
|
|
|
|
|
|
} |
351
|
|
|
|
|
|
|
|
352
|
36
|
|
|
|
|
|
UV prime_count_approx(UV n) |
353
|
|
|
|
|
|
|
{ |
354
|
36
|
100
|
|
|
|
|
if (n < 3000000) return segment_prime_count(2, n); |
355
|
25
|
|
|
|
|
|
return (UV) (RiemannR( (long double) n ) + 0.5 ); |
356
|
|
|
|
|
|
|
} |
357
|
|
|
|
|
|
|
|
358
|
|
|
|
|
|
|
/* See http://numbers.computation.free.fr/Constants/Primes/twin.pdf, page 5 */ |
359
|
|
|
|
|
|
|
/* Upper limit is in Wu, Acta Arith 114 (2004). 4.48857*x/(log(x)*log(x) */ |
360
|
623
|
|
|
|
|
|
UV twin_prime_count_approx(UV n) |
361
|
|
|
|
|
|
|
{ |
362
|
|
|
|
|
|
|
/* Best would be another estimate for n < ~ 5000 */ |
363
|
623
|
100
|
|
|
|
|
if (n < 2000) return twin_prime_count(3,n); |
364
|
|
|
|
|
|
|
{ |
365
|
|
|
|
|
|
|
/* Sebah and Gourdon 2002 */ |
366
|
216
|
|
|
|
|
|
const long double two_C2 = 1.32032363169373914785562422L; |
367
|
216
|
|
|
|
|
|
const long double two_over_log_two = 2.8853900817779268147198494L; |
368
|
216
|
|
|
|
|
|
long double ln = (long double) n; |
369
|
216
|
|
|
|
|
|
long double logn = logl(ln); |
370
|
216
|
|
|
|
|
|
long double li2 = Ei(logn) + two_over_log_two-ln/logn; |
371
|
|
|
|
|
|
|
/* try to minimize MSE */ |
372
|
216
|
100
|
|
|
|
|
if (n < 32000000) { |
373
|
|
|
|
|
|
|
long double fm; |
374
|
86
|
50
|
|
|
|
|
if (n < 4000) fm = 0.2952; |
375
|
86
|
100
|
|
|
|
|
else if (n < 8000) fm = 0.3152; |
376
|
85
|
50
|
|
|
|
|
else if (n < 16000) fm = 0.3090; |
377
|
85
|
100
|
|
|
|
|
else if (n < 32000) fm = 0.3096; |
378
|
70
|
100
|
|
|
|
|
else if (n < 64000) fm = 0.3100; |
379
|
56
|
100
|
|
|
|
|
else if (n < 128000) fm = 0.3089; |
380
|
41
|
50
|
|
|
|
|
else if (n < 256000) fm = 0.3099; |
381
|
41
|
100
|
|
|
|
|
else if (n < 600000) fm = .3091 + (n-256000) * (.3056-.3091) / (600000-256000); |
382
|
22
|
50
|
|
|
|
|
else if (n < 1000000) fm = .3062 + (n-600000) * (.3042-.3062) / (1000000-600000); |
383
|
22
|
50
|
|
|
|
|
else if (n < 4000000) fm = .3067 + (n-1000000) * (.3041-.3067) / (4000000-1000000); |
384
|
22
|
50
|
|
|
|
|
else if (n <16000000) fm = .3033 + (n-4000000) * (.2983-.3033) / (16000000-4000000); |
385
|
0
|
|
|
|
|
|
else fm = .2980 + (n-16000000) * (.2965-.2980) / (32000000-16000000); |
386
|
86
|
|
|
|
|
|
li2 *= fm * logl(12+logn); |
387
|
|
|
|
|
|
|
} |
388
|
216
|
|
|
|
|
|
return (UV) (two_C2 * li2 + 0.5L); |
389
|
|
|
|
|
|
|
} |
390
|
|
|
|
|
|
|
} |
391
|
|
|
|
|
|
|
|
392
|
16469
|
|
|
|
|
|
UV prime_count_lower(UV n) |
393
|
|
|
|
|
|
|
{ |
394
|
|
|
|
|
|
|
long double fn, fl1, fl2, lower, a; |
395
|
|
|
|
|
|
|
|
396
|
16469
|
100
|
|
|
|
|
if (n < 33000) return segment_prime_count(2, n); |
397
|
|
|
|
|
|
|
|
398
|
998
|
|
|
|
|
|
fn = (long double) n; |
399
|
998
|
|
|
|
|
|
fl1 = logl(n); |
400
|
998
|
|
|
|
|
|
fl2 = fl1 * fl1; |
401
|
|
|
|
|
|
|
|
402
|
|
|
|
|
|
|
/* Axler 2014: https://arxiv.org/abs/1409.1780 (v7 2016), Cor 3.6 |
403
|
|
|
|
|
|
|
* show variations of this. */ |
404
|
|
|
|
|
|
|
|
405
|
998
|
100
|
|
|
|
|
if (n <= 300000) { /* Quite accurate and avoids calling Li for speed. */ |
406
|
142
|
100
|
|
|
|
|
a = (n < 70200) ? 947 : (n < 176000) ? 904 : 829; |
|
|
100
|
|
|
|
|
|
407
|
142
|
|
|
|
|
|
lower = fn / (fl1 - 1 - 1/fl1 - 2.85/fl2 - 13.15/(fl1*fl2) + a/(fl2*fl2)); |
408
|
856
|
100
|
|
|
|
|
} else if (n < UVCONST(4000000000)) { |
409
|
|
|
|
|
|
|
/* Loose enough that FP differences in Li(n) should be ok. */ |
410
|
837
|
|
|
|
|
|
a = (n < 88783) ? 4.0L |
411
|
1674
|
50
|
|
|
|
|
: (n < 300000) ? -3.0L |
412
|
1674
|
50
|
|
|
|
|
: (n < 303000) ? 5.0L |
413
|
1674
|
50
|
|
|
|
|
: (n < 1100000) ? -7.0L |
414
|
1384
|
100
|
|
|
|
|
: (n < 4500000) ? -37.0L |
415
|
581
|
100
|
|
|
|
|
: (n < 10200000) ? -70.0L |
416
|
42
|
100
|
|
|
|
|
: (n < 36900000) ? -53.0L |
417
|
15
|
100
|
|
|
|
|
: (n < 38100000) ? -29.0L |
418
|
7
|
50
|
|
|
|
|
: -84.0L; |
419
|
837
|
|
|
|
|
|
lower = Li(fn) - (sqrtl(fn)/fl1) * (1.94L + 2.50L/fl1 + a/fl2); |
420
|
19
|
100
|
|
|
|
|
} else if (fn < 1e19) { /* Büthe 2015 1.9 1511.02032v1.pdf */ |
421
|
17
|
|
|
|
|
|
lower = Li(fn) - (sqrtl(fn)/fl1) * (1.94L + 3.88L/fl1 + 27.57L/fl2); |
422
|
|
|
|
|
|
|
} else { /* Büthe 2014 v3 7.2 1410.7015v3.pdf */ |
423
|
2
|
|
|
|
|
|
lower = Li(fn) - fl1*sqrtl(fn)/25.132741228718345907701147L; |
424
|
|
|
|
|
|
|
} |
425
|
998
|
|
|
|
|
|
return (UV) ceill(lower); |
426
|
|
|
|
|
|
|
} |
427
|
|
|
|
|
|
|
|
428
|
|
|
|
|
|
|
typedef struct { |
429
|
|
|
|
|
|
|
UV thresh; |
430
|
|
|
|
|
|
|
float aval; |
431
|
|
|
|
|
|
|
} thresh_t; |
432
|
|
|
|
|
|
|
|
433
|
|
|
|
|
|
|
static const thresh_t _upper_thresh[] = { |
434
|
|
|
|
|
|
|
{ 59000, 2.48 }, |
435
|
|
|
|
|
|
|
{ 355991, 2.54 }, |
436
|
|
|
|
|
|
|
{ 3550000, 2.51 }, |
437
|
|
|
|
|
|
|
{ 3560000, 2.49 }, |
438
|
|
|
|
|
|
|
{ 5000000, 2.48 }, |
439
|
|
|
|
|
|
|
{ 8000000, 2.47 }, |
440
|
|
|
|
|
|
|
{ 13000000, 2.46 }, |
441
|
|
|
|
|
|
|
{ 18000000, 2.45 }, |
442
|
|
|
|
|
|
|
{ 31000000, 2.44 }, |
443
|
|
|
|
|
|
|
{ 41000000, 2.43 }, |
444
|
|
|
|
|
|
|
{ 48000000, 2.42 }, |
445
|
|
|
|
|
|
|
{ 119000000, 2.41 }, |
446
|
|
|
|
|
|
|
{ 182000000, 2.40 }, |
447
|
|
|
|
|
|
|
{ 192000000, 2.395 }, |
448
|
|
|
|
|
|
|
{ 213000000, 2.390 }, |
449
|
|
|
|
|
|
|
{ 271000000, 2.385 }, |
450
|
|
|
|
|
|
|
{ 322000000, 2.380 }, |
451
|
|
|
|
|
|
|
{ 400000000, 2.375 }, |
452
|
|
|
|
|
|
|
{ 510000000, 2.370 }, |
453
|
|
|
|
|
|
|
{ 682000000, 2.367 }, |
454
|
|
|
|
|
|
|
{ UVCONST(2953652287), 2.362 } |
455
|
|
|
|
|
|
|
}; |
456
|
|
|
|
|
|
|
#define NUPPER_THRESH (sizeof(_upper_thresh)/sizeof(_upper_thresh[0])) |
457
|
|
|
|
|
|
|
|
458
|
4309
|
|
|
|
|
|
UV prime_count_upper(UV n) |
459
|
|
|
|
|
|
|
{ |
460
|
|
|
|
|
|
|
int i; |
461
|
|
|
|
|
|
|
long double fn, fl1, fl2, upper, a; |
462
|
|
|
|
|
|
|
|
463
|
4309
|
100
|
|
|
|
|
if (n < 33000) return segment_prime_count(2, n); |
464
|
|
|
|
|
|
|
|
465
|
582
|
|
|
|
|
|
fn = (long double) n; |
466
|
582
|
|
|
|
|
|
fl1 = logl(n); |
467
|
582
|
|
|
|
|
|
fl2 = fl1 * fl1; |
468
|
|
|
|
|
|
|
|
469
|
|
|
|
|
|
|
/* Axler 2014: https://arxiv.org/abs/1409.1780 (v7 2016), Cor 3.5 |
470
|
|
|
|
|
|
|
* |
471
|
|
|
|
|
|
|
* upper = fn/(fl1-1.0L-1.0L/fl1-3.35L/fl2-12.65L/(fl2*fl1)-89.6L/(fl2*fl2)); |
472
|
|
|
|
|
|
|
* return (UV) floorl(upper); |
473
|
|
|
|
|
|
|
*/ |
474
|
|
|
|
|
|
|
|
475
|
582
|
100
|
|
|
|
|
if (BITS_PER_WORD == 32 || fn <= 821800000.0) { /* Dusart 2010, page 2 */ |
476
|
1839
|
50
|
|
|
|
|
for (i = 0; i < (int)NUPPER_THRESH; i++) |
477
|
1839
|
100
|
|
|
|
|
if (n < _upper_thresh[i].thresh) |
478
|
561
|
|
|
|
|
|
break; |
479
|
561
|
50
|
|
|
|
|
a = (i < (int)NUPPER_THRESH) ? _upper_thresh[i].aval : 2.334L; |
480
|
561
|
|
|
|
|
|
upper = fn/fl1 * (1.0L + 1.0L/fl1 + a/fl2); |
481
|
21
|
100
|
|
|
|
|
} else if (fn < 1e19) { /* Büthe 2015 1.10 Skewes number lower limit */ |
482
|
19
|
100
|
|
|
|
|
a = (fn < 1100000000.0) ? 0.032 /* Empirical */ |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
483
|
|
|
|
|
|
|
: (fn < 10010000000.0) ? 0.027 /* Empirical */ |
484
|
|
|
|
|
|
|
: (fn < 101260000000.0) ? 0.021 /* Empirical */ |
485
|
|
|
|
|
|
|
: 0.0; |
486
|
19
|
|
|
|
|
|
upper = Li(fn) - a * fl1*sqrtl(fn)/25.132741228718345907701147L; |
487
|
|
|
|
|
|
|
} else { /* Büthe 2014 7.4 */ |
488
|
2
|
|
|
|
|
|
upper = Li(fn) + fl1*sqrtl(fn)/25.132741228718345907701147L; |
489
|
|
|
|
|
|
|
} |
490
|
582
|
|
|
|
|
|
return (UV) floorl(upper); |
491
|
|
|
|
|
|
|
} |
492
|
|
|
|
|
|
|
|
493
|
3004
|
|
|
|
|
|
static void simple_nth_limits(UV *lo, UV *hi, long double n, long double logn, long double loglogn) { |
494
|
3004
|
100
|
|
|
|
|
const long double a = (n < 228) ? .6483 : (n < 948) ? .8032 : (n < 2195) ? .8800 : (n < 39017) ? .9019 : .9484; |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
495
|
3004
|
|
|
|
|
|
*lo = n * (logn + loglogn - 1.0 + ((loglogn-2.10)/logn)); |
496
|
3004
|
|
|
|
|
|
*hi = n * (logn + loglogn - a); |
497
|
3004
|
50
|
|
|
|
|
if (*hi < *lo) *hi = MPU_MAX_PRIME; |
498
|
3004
|
|
|
|
|
|
} |
499
|
|
|
|
|
|
|
|
500
|
|
|
|
|
|
|
/* The nth prime will be less or equal to this number */ |
501
|
2927
|
|
|
|
|
|
UV nth_prime_upper(UV n) |
502
|
|
|
|
|
|
|
{ |
503
|
|
|
|
|
|
|
long double fn, flogn, flog2n, upper; |
504
|
|
|
|
|
|
|
|
505
|
2927
|
100
|
|
|
|
|
if (n < NPRIMES_SMALL) |
506
|
429
|
|
|
|
|
|
return primes_small[n]; |
507
|
|
|
|
|
|
|
|
508
|
2498
|
|
|
|
|
|
fn = (long double) n; |
509
|
2498
|
|
|
|
|
|
flogn = logl(n); |
510
|
2498
|
|
|
|
|
|
flog2n = logl(flogn); /* Note distinction between log_2(n) and log^2(n) */ |
511
|
|
|
|
|
|
|
|
512
|
2498
|
100
|
|
|
|
|
if (n < 688383) { |
513
|
|
|
|
|
|
|
UV lo,hi; |
514
|
2406
|
|
|
|
|
|
simple_nth_limits(&lo, &hi, fn, flogn, flog2n); |
515
|
18360
|
100
|
|
|
|
|
while (lo < hi) { |
516
|
15954
|
|
|
|
|
|
UV mid = lo + (hi-lo)/2; |
517
|
15954
|
100
|
|
|
|
|
if (prime_count_lower(mid) < n) lo = mid+1; |
518
|
8133
|
|
|
|
|
|
else hi = mid; |
519
|
|
|
|
|
|
|
} |
520
|
2406
|
|
|
|
|
|
return lo; |
521
|
|
|
|
|
|
|
} |
522
|
|
|
|
|
|
|
|
523
|
|
|
|
|
|
|
/* Dusart 2010 page 2 */ |
524
|
92
|
|
|
|
|
|
upper = fn * (flogn + flog2n - 1.0 + ((flog2n-2.00)/flogn)); |
525
|
92
|
100
|
|
|
|
|
if (n >= 46254381) { |
526
|
|
|
|
|
|
|
/* Axler 2017 http://arxiv.org/pdf/1706.03651.pdf Corollary 1.2 */ |
527
|
12
|
|
|
|
|
|
upper -= fn * ((flog2n*flog2n-6*flog2n+10.667)/(2*flogn*flogn)); |
528
|
80
|
100
|
|
|
|
|
} else if (n >= 8009824) { |
529
|
|
|
|
|
|
|
/* Axler 2013 page viii Korollar G */ |
530
|
61
|
|
|
|
|
|
upper -= fn * ((flog2n*flog2n-6*flog2n+10.273)/(2*flogn*flogn)); |
531
|
|
|
|
|
|
|
} |
532
|
|
|
|
|
|
|
|
533
|
92
|
50
|
|
|
|
|
if (upper >= (long double)UV_MAX) { |
534
|
0
|
0
|
|
|
|
|
if (n <= MPU_MAX_PRIME_IDX) return MPU_MAX_PRIME; |
535
|
0
|
|
|
|
|
|
croak("nth_prime_upper(%"UVuf") overflow", n); |
536
|
|
|
|
|
|
|
} |
537
|
|
|
|
|
|
|
|
538
|
92
|
|
|
|
|
|
return (UV) floorl(upper); |
539
|
|
|
|
|
|
|
} |
540
|
|
|
|
|
|
|
|
541
|
|
|
|
|
|
|
/* The nth prime will be greater than or equal to this number */ |
542
|
1313
|
|
|
|
|
|
UV nth_prime_lower(UV n) |
543
|
|
|
|
|
|
|
{ |
544
|
|
|
|
|
|
|
double fn, flogn, flog2n, lower; |
545
|
|
|
|
|
|
|
|
546
|
1313
|
100
|
|
|
|
|
if (n < NPRIMES_SMALL) |
547
|
638
|
|
|
|
|
|
return primes_small[n]; |
548
|
|
|
|
|
|
|
|
549
|
675
|
|
|
|
|
|
fn = (double) n; |
550
|
675
|
|
|
|
|
|
flogn = log(n); |
551
|
675
|
|
|
|
|
|
flog2n = log(flogn); |
552
|
|
|
|
|
|
|
|
553
|
|
|
|
|
|
|
/* For small values, do a binary search on the inverse prime count */ |
554
|
675
|
100
|
|
|
|
|
if (n < 2000000) { |
555
|
|
|
|
|
|
|
UV lo,hi; |
556
|
598
|
|
|
|
|
|
simple_nth_limits(&lo, &hi, fn, flogn, flog2n); |
557
|
4392
|
100
|
|
|
|
|
while (lo < hi) { |
558
|
3794
|
|
|
|
|
|
UV mid = lo + (hi-lo)/2; |
559
|
3794
|
100
|
|
|
|
|
if (prime_count_upper(mid) < n) lo = mid+1; |
560
|
1870
|
|
|
|
|
|
else hi = mid; |
561
|
|
|
|
|
|
|
} |
562
|
598
|
|
|
|
|
|
return lo; |
563
|
|
|
|
|
|
|
} |
564
|
|
|
|
|
|
|
|
565
|
|
|
|
|
|
|
{ /* Axler 2017 http://arxiv.org/pdf/1706.03651.pdf Corollary 1.4 */ |
566
|
77
|
100
|
|
|
|
|
double b1 = (n < 56000000) ? 11.200 : 11.508; |
567
|
77
|
|
|
|
|
|
lower = fn * (flogn + flog2n-1.0 + ((flog2n-2.00)/flogn) - ((flog2n*flog2n-6*flog2n+b1)/(2*flogn*flogn))); |
568
|
|
|
|
|
|
|
} |
569
|
|
|
|
|
|
|
|
570
|
77
|
|
|
|
|
|
return (UV) ceill(lower); |
571
|
|
|
|
|
|
|
} |
572
|
|
|
|
|
|
|
|
573
|
25
|
|
|
|
|
|
UV nth_prime_approx(UV n) |
574
|
|
|
|
|
|
|
{ |
575
|
25
|
100
|
|
|
|
|
return (n < NPRIMES_SMALL) ? primes_small[n] : inverse_R(n); |
576
|
|
|
|
|
|
|
} |
577
|
|
|
|
|
|
|
|
578
|
|
|
|
|
|
|
|
579
|
6626
|
|
|
|
|
|
UV nth_prime(UV n) |
580
|
|
|
|
|
|
|
{ |
581
|
|
|
|
|
|
|
const unsigned char* cache_sieve; |
582
|
|
|
|
|
|
|
unsigned char* segment; |
583
|
|
|
|
|
|
|
UV upper_limit, segbase, segment_size, p, count, target; |
584
|
|
|
|
|
|
|
|
585
|
|
|
|
|
|
|
/* If very small, return the table entry */ |
586
|
6626
|
100
|
|
|
|
|
if (n < NPRIMES_SMALL) |
587
|
5538
|
|
|
|
|
|
return primes_small[n]; |
588
|
|
|
|
|
|
|
|
589
|
|
|
|
|
|
|
/* Determine a bound on the nth prime. We know it comes before this. */ |
590
|
1088
|
|
|
|
|
|
upper_limit = nth_prime_upper(n); |
591
|
1088
|
50
|
|
|
|
|
MPUassert(upper_limit > 0, "nth_prime got an upper limit of 0"); |
592
|
1088
|
|
|
|
|
|
p = count = 0; |
593
|
1088
|
|
|
|
|
|
target = n-3; |
594
|
|
|
|
|
|
|
|
595
|
|
|
|
|
|
|
/* For relatively small values, generate a sieve and count the results. |
596
|
|
|
|
|
|
|
* |
597
|
|
|
|
|
|
|
* For larger values, compute an approximate low estimate, use our fast |
598
|
|
|
|
|
|
|
* prime count, then segment sieve forwards or backwards for the rest. |
599
|
|
|
|
|
|
|
*/ |
600
|
1088
|
100
|
|
|
|
|
if (upper_limit <= get_prime_cache(0, 0) || upper_limit <= 32*1024*30) { |
|
|
100
|
|
|
|
|
|
601
|
|
|
|
|
|
|
/* Generate a sieve and count. */ |
602
|
1066
|
|
|
|
|
|
segment_size = get_prime_cache(upper_limit, &cache_sieve) / 30; |
603
|
|
|
|
|
|
|
/* Count up everything in the cached sieve. */ |
604
|
2132
|
50
|
|
|
|
|
if (segment_size > 0) |
605
|
1066
|
|
|
|
|
|
count += count_segment_maxcount(cache_sieve, 0, segment_size, target, &p); |
606
|
|
|
|
|
|
|
release_prime_cache(cache_sieve); |
607
|
|
|
|
|
|
|
} else { |
608
|
|
|
|
|
|
|
/* A binary search on RiemannR is nice, but ends up either often being |
609
|
|
|
|
|
|
|
* being higher (requiring going backwards) or biased and then far too |
610
|
|
|
|
|
|
|
* low. Using the inverse Li is easier and more consistent. */ |
611
|
22
|
|
|
|
|
|
UV lower_limit = inverse_li(n); |
612
|
|
|
|
|
|
|
/* For even better performance, add in half the usual correction, which |
613
|
|
|
|
|
|
|
* will get us even closer, so even less sieving required. However, it |
614
|
|
|
|
|
|
|
* is now possible to get a result higher than the value, so we'll need |
615
|
|
|
|
|
|
|
* to handle that case. It still ends up being a better deal than R, |
616
|
|
|
|
|
|
|
* given that we don't have a fast backward sieve. */ |
617
|
22
|
|
|
|
|
|
lower_limit += inverse_li(isqrt(n))/4; |
618
|
22
|
|
|
|
|
|
segment_size = lower_limit / 30; |
619
|
22
|
|
|
|
|
|
lower_limit = 30 * segment_size - 1; |
620
|
22
|
|
|
|
|
|
count = prime_count(2,lower_limit); |
621
|
|
|
|
|
|
|
|
622
|
|
|
|
|
|
|
/* printf("We've estimated %lu too %s.\n", (count>n)?count-n:n-count, (count>n)?"FAR":"little"); */ |
623
|
|
|
|
|
|
|
/* printf("Our limit %lu %s a prime\n", lower_limit, is_prime(lower_limit) ? "is" : "is not"); */ |
624
|
|
|
|
|
|
|
|
625
|
22
|
100
|
|
|
|
|
if (count >= n) { /* Too far. Walk backwards */ |
626
|
3
|
50
|
|
|
|
|
if (is_prime(lower_limit)) count--; |
627
|
7
|
100
|
|
|
|
|
for (p = 0; p <= (count-n); p++) |
628
|
4
|
|
|
|
|
|
lower_limit = prev_prime(lower_limit); |
629
|
3
|
|
|
|
|
|
return lower_limit; |
630
|
|
|
|
|
|
|
} |
631
|
19
|
|
|
|
|
|
count -= 3; |
632
|
|
|
|
|
|
|
|
633
|
|
|
|
|
|
|
/* Make sure the segment siever won't have to keep resieving. */ |
634
|
19
|
|
|
|
|
|
prime_precalc(isqrt(upper_limit)); |
635
|
|
|
|
|
|
|
} |
636
|
|
|
|
|
|
|
|
637
|
1085
|
100
|
|
|
|
|
if (count == target) |
638
|
1066
|
|
|
|
|
|
return p; |
639
|
|
|
|
|
|
|
|
640
|
|
|
|
|
|
|
/* Start segment sieving. Get memory to sieve into. */ |
641
|
19
|
|
|
|
|
|
segbase = segment_size; |
642
|
19
|
|
|
|
|
|
segment = get_prime_segment(&segment_size); |
643
|
|
|
|
|
|
|
|
644
|
38
|
100
|
|
|
|
|
while (count < target) { |
645
|
|
|
|
|
|
|
/* Limit the segment size if we know the answer comes earlier */ |
646
|
19
|
100
|
|
|
|
|
if ( (30*(segbase+segment_size)+29) > upper_limit ) |
647
|
18
|
|
|
|
|
|
segment_size = (upper_limit - segbase*30 + 30) / 30; |
648
|
|
|
|
|
|
|
|
649
|
|
|
|
|
|
|
/* Do the actual sieving in the range */ |
650
|
19
|
|
|
|
|
|
sieve_segment(segment, segbase, segbase + segment_size-1); |
651
|
|
|
|
|
|
|
|
652
|
|
|
|
|
|
|
/* Count up everything in this segment */ |
653
|
19
|
|
|
|
|
|
count += count_segment_maxcount(segment, 30*segbase, segment_size, target-count, &p); |
654
|
|
|
|
|
|
|
|
655
|
19
|
50
|
|
|
|
|
if (count < target) |
656
|
0
|
|
|
|
|
|
segbase += segment_size; |
657
|
|
|
|
|
|
|
} |
658
|
19
|
|
|
|
|
|
release_prime_segment(segment); |
659
|
19
|
50
|
|
|
|
|
MPUassert(count == target, "nth_prime got incorrect count"); |
660
|
6626
|
|
|
|
|
|
return ( (segbase*30) + p ); |
661
|
|
|
|
|
|
|
} |
662
|
|
|
|
|
|
|
|
663
|
|
|
|
|
|
|
/******************************************************************************/ |
664
|
|
|
|
|
|
|
/* TWIN PRIMES */ |
665
|
|
|
|
|
|
|
/******************************************************************************/ |
666
|
|
|
|
|
|
|
|
667
|
|
|
|
|
|
|
#if BITS_PER_WORD < 64 |
668
|
|
|
|
|
|
|
static const UV twin_steps[] = |
669
|
|
|
|
|
|
|
{58980,48427,45485,43861,42348,41457,40908,39984,39640,39222, |
670
|
|
|
|
|
|
|
373059,353109,341253,332437,326131,320567,315883,312511,309244, |
671
|
|
|
|
|
|
|
2963535,2822103,2734294,2673728, |
672
|
|
|
|
|
|
|
}; |
673
|
|
|
|
|
|
|
static const unsigned int twin_num_exponents = 3; |
674
|
|
|
|
|
|
|
static const unsigned int twin_last_mult = 4; /* 4000M */ |
675
|
|
|
|
|
|
|
#else |
676
|
|
|
|
|
|
|
static const UV twin_steps[] = |
677
|
|
|
|
|
|
|
{58980,48427,45485,43861,42348,41457,40908,39984,39640,39222, |
678
|
|
|
|
|
|
|
373059,353109,341253,332437,326131,320567,315883,312511,309244, |
679
|
|
|
|
|
|
|
2963535,2822103,2734294,2673728,2626243,2585752,2554015,2527034,2501469, |
680
|
|
|
|
|
|
|
24096420,23046519,22401089,21946975,21590715,21300632,21060884,20854501,20665634, |
681
|
|
|
|
|
|
|
199708605,191801047,186932018,183404596,180694619,178477447,176604059,174989299,173597482, |
682
|
|
|
|
|
|
|
1682185723,1620989842,1583071291,1555660927,1534349481,1517031854,1502382532,1489745250, 1478662752, |
683
|
|
|
|
|
|
|
14364197903,13879821868,13578563641,13361034187,13191416949,13053013447,12936030624,12835090276, 12746487898, |
684
|
|
|
|
|
|
|
124078078589,120182602778,117753842540,115995331742,114622738809,113499818125,112551549250,111732637241,111012321565, |
685
|
|
|
|
|
|
|
1082549061370,1050759497170,1030883829367,1016473645857,1005206830409,995980796683,988183329733,981441437376,975508027029, |
686
|
|
|
|
|
|
|
9527651328494, 9264843314051, 9100153493509, 8980561036751, 8886953365929, 8810223086411, 8745329823109, 8689179566509, 8639748641098, |
687
|
|
|
|
|
|
|
84499489470819, 82302056642520, 80922166953330, 79918799449753, 79132610984280, 78487688897426, 77941865286827, 77469296499217, 77053075040105, |
688
|
|
|
|
|
|
|
754527610498466, 735967887462370, 724291736697048, |
689
|
|
|
|
|
|
|
}; |
690
|
|
|
|
|
|
|
static const unsigned int twin_num_exponents = 12; |
691
|
|
|
|
|
|
|
static const unsigned int twin_last_mult = 4; /* 4e19 */ |
692
|
|
|
|
|
|
|
#endif |
693
|
|
|
|
|
|
|
|
694
|
418
|
|
|
|
|
|
UV twin_prime_count(UV beg, UV end) |
695
|
|
|
|
|
|
|
{ |
696
|
|
|
|
|
|
|
unsigned char* segment; |
697
|
418
|
|
|
|
|
|
UV sum = 0; |
698
|
|
|
|
|
|
|
|
699
|
|
|
|
|
|
|
/* First use the tables of #e# from 1e7 to 2e16. */ |
700
|
418
|
100
|
|
|
|
|
if (beg <= 3 && end >= 10000000) { |
|
|
100
|
|
|
|
|
|
701
|
6
|
|
|
|
|
|
UV mult, exp, step = 0, base = 10000000; |
702
|
40
|
50
|
|
|
|
|
for (exp = 0; exp < twin_num_exponents && end >= base; exp++) { |
|
|
100
|
|
|
|
|
|
703
|
312
|
100
|
|
|
|
|
for (mult = 1; mult < 10 && end >= mult*base; mult++) { |
|
|
100
|
|
|
|
|
|
704
|
278
|
|
|
|
|
|
sum += twin_steps[step++]; |
705
|
278
|
|
|
|
|
|
beg = mult*base; |
706
|
278
|
50
|
|
|
|
|
if (exp == twin_num_exponents-1 && mult >= twin_last_mult) break; |
|
|
0
|
|
|
|
|
|
707
|
|
|
|
|
|
|
} |
708
|
34
|
|
|
|
|
|
base *= 10; |
709
|
|
|
|
|
|
|
} |
710
|
|
|
|
|
|
|
} |
711
|
418
|
100
|
|
|
|
|
if (beg <= 3 && end >= 3) sum++; |
|
|
50
|
|
|
|
|
|
712
|
418
|
100
|
|
|
|
|
if (beg <= 5 && end >= 5) sum++; |
|
|
50
|
|
|
|
|
|
713
|
418
|
100
|
|
|
|
|
if (beg < 11) beg = 7; |
714
|
418
|
50
|
|
|
|
|
if (beg <= end) { |
715
|
|
|
|
|
|
|
/* Make end points odd */ |
716
|
418
|
|
|
|
|
|
beg |= 1; |
717
|
418
|
|
|
|
|
|
end = (end-1) | 1; |
718
|
|
|
|
|
|
|
/* Cheesy way of counting the partial-byte edges */ |
719
|
5392
|
100
|
|
|
|
|
while ((beg % 30) != 1) { |
720
|
4974
|
100
|
|
|
|
|
if (is_prime(beg) && is_prime(beg+2) && beg <= end) sum++; |
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
721
|
4974
|
|
|
|
|
|
beg += 2; |
722
|
|
|
|
|
|
|
} |
723
|
3264
|
100
|
|
|
|
|
while ((end % 30) != 29) { |
724
|
2861
|
100
|
|
|
|
|
if (is_prime(end) && is_prime(end+2) && beg <= end) sum++; |
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
725
|
2861
|
100
|
|
|
|
|
end -= 2; if (beg > end) break; |
726
|
|
|
|
|
|
|
} |
727
|
|
|
|
|
|
|
} |
728
|
418
|
100
|
|
|
|
|
if (beg <= end) { |
729
|
|
|
|
|
|
|
UV seg_base, seg_low, seg_high; |
730
|
403
|
|
|
|
|
|
void* ctx = start_segment_primes(beg, end, &segment); |
731
|
806
|
100
|
|
|
|
|
while (next_segment_primes(ctx, &seg_base, &seg_low, &seg_high)) { |
732
|
403
|
|
|
|
|
|
UV bytes = seg_high/30 - seg_low/30 + 1; |
733
|
|
|
|
|
|
|
unsigned char s; |
734
|
403
|
|
|
|
|
|
const unsigned char* sp = segment; |
735
|
403
|
|
|
|
|
|
const unsigned char* const spend = segment + bytes - 1; |
736
|
53716
|
100
|
|
|
|
|
while (sp < spend) { |
737
|
53313
|
|
|
|
|
|
s = *sp++; |
738
|
53313
|
100
|
|
|
|
|
if (!(s & 0x0C)) sum++; |
739
|
53313
|
100
|
|
|
|
|
if (!(s & 0x30)) sum++; |
740
|
53313
|
100
|
|
|
|
|
if (!(s & 0x80) && !(*sp & 0x01)) sum++; |
|
|
100
|
|
|
|
|
|
741
|
|
|
|
|
|
|
} |
742
|
403
|
|
|
|
|
|
s = *sp; |
743
|
403
|
100
|
|
|
|
|
if (!(s & 0x0C)) sum++; |
744
|
403
|
100
|
|
|
|
|
if (!(s & 0x30)) sum++; |
745
|
403
|
100
|
|
|
|
|
if (!(s & 0x80) && is_prime(seg_high+2)) sum++; |
|
|
100
|
|
|
|
|
|
746
|
|
|
|
|
|
|
} |
747
|
403
|
|
|
|
|
|
end_segment_primes(ctx); |
748
|
|
|
|
|
|
|
} |
749
|
418
|
|
|
|
|
|
return sum; |
750
|
|
|
|
|
|
|
} |
751
|
|
|
|
|
|
|
|
752
|
54
|
|
|
|
|
|
UV nth_twin_prime(UV n) |
753
|
|
|
|
|
|
|
{ |
754
|
|
|
|
|
|
|
unsigned char* segment; |
755
|
|
|
|
|
|
|
double dend; |
756
|
54
|
|
|
|
|
|
UV nth = 0; |
757
|
|
|
|
|
|
|
UV beg, end; |
758
|
|
|
|
|
|
|
|
759
|
54
|
100
|
|
|
|
|
if (n < 6) { |
760
|
6
|
|
|
|
|
|
switch (n) { |
761
|
0
|
|
|
|
|
|
case 0: nth = 0; break; |
762
|
1
|
|
|
|
|
|
case 1: nth = 3; break; |
763
|
1
|
|
|
|
|
|
case 2: nth = 5; break; |
764
|
1
|
|
|
|
|
|
case 3: nth =11; break; |
765
|
1
|
|
|
|
|
|
case 4: nth =17; break; |
766
|
|
|
|
|
|
|
case 5: |
767
|
2
|
|
|
|
|
|
default: nth =29; break; |
768
|
|
|
|
|
|
|
} |
769
|
6
|
|
|
|
|
|
return nth; |
770
|
|
|
|
|
|
|
} |
771
|
|
|
|
|
|
|
|
772
|
48
|
|
|
|
|
|
end = UV_MAX - 16; |
773
|
48
|
|
|
|
|
|
dend = 800.0 + 1.01L * (double)nth_twin_prime_approx(n); |
774
|
48
|
50
|
|
|
|
|
if (dend < (double)end) end = (UV) dend; |
775
|
|
|
|
|
|
|
|
776
|
48
|
|
|
|
|
|
beg = 2; |
777
|
48
|
50
|
|
|
|
|
if (n > 58980) { /* Use twin_prime_count tables to accelerate if possible */ |
778
|
0
|
|
|
|
|
|
UV mult, exp, step = 0, base = 10000000; |
779
|
0
|
0
|
|
|
|
|
for (exp = 0; exp < twin_num_exponents && end >= base; exp++) { |
|
|
0
|
|
|
|
|
|
780
|
0
|
0
|
|
|
|
|
for (mult = 1; mult < 10 && n > twin_steps[step]; mult++) { |
|
|
0
|
|
|
|
|
|
781
|
0
|
|
|
|
|
|
n -= twin_steps[step++]; |
782
|
0
|
|
|
|
|
|
beg = mult*base; |
783
|
0
|
0
|
|
|
|
|
if (exp == twin_num_exponents-1 && mult >= twin_last_mult) break; |
|
|
0
|
|
|
|
|
|
784
|
|
|
|
|
|
|
} |
785
|
0
|
|
|
|
|
|
base *= 10; |
786
|
|
|
|
|
|
|
} |
787
|
|
|
|
|
|
|
} |
788
|
48
|
50
|
|
|
|
|
if (beg == 2) { beg = 31; n -= 5; } |
789
|
|
|
|
|
|
|
|
790
|
|
|
|
|
|
|
{ |
791
|
|
|
|
|
|
|
UV seg_base, seg_low, seg_high; |
792
|
48
|
|
|
|
|
|
void* ctx = start_segment_primes(beg, end, &segment); |
793
|
96
|
100
|
|
|
|
|
while (n && next_segment_primes(ctx, &seg_base, &seg_low, &seg_high)) { |
|
|
50
|
|
|
|
|
|
794
|
48
|
|
|
|
|
|
UV p, bytes = seg_high/30 - seg_low/30 + 1; |
795
|
48
|
|
|
|
|
|
UV s = ((UV)segment[0]) << 8; |
796
|
4428
|
50
|
|
|
|
|
for (p = 0; p < bytes; p++) { |
797
|
4428
|
|
|
|
|
|
s >>= 8; |
798
|
4428
|
50
|
|
|
|
|
if (p+1 < bytes) s |= (((UV)segment[p+1]) << 8); |
799
|
0
|
0
|
|
|
|
|
else if (!is_prime(seg_high+2)) s |= 0xFF00; |
800
|
4428
|
100
|
|
|
|
|
if (!(s & 0x000C) && !--n) { nth=seg_base+p*30+11; break; } |
|
|
100
|
|
|
|
|
|
801
|
4409
|
100
|
|
|
|
|
if (!(s & 0x0030) && !--n) { nth=seg_base+p*30+17; break; } |
|
|
100
|
|
|
|
|
|
802
|
4396
|
100
|
|
|
|
|
if (!(s & 0x0180) && !--n) { nth=seg_base+p*30+29; break; } |
|
|
100
|
|
|
|
|
|
803
|
|
|
|
|
|
|
} |
804
|
|
|
|
|
|
|
} |
805
|
48
|
|
|
|
|
|
end_segment_primes(ctx); |
806
|
|
|
|
|
|
|
} |
807
|
54
|
|
|
|
|
|
return nth; |
808
|
|
|
|
|
|
|
} |
809
|
|
|
|
|
|
|
|
810
|
58
|
|
|
|
|
|
UV nth_twin_prime_approx(UV n) |
811
|
|
|
|
|
|
|
{ |
812
|
58
|
|
|
|
|
|
long double fn = (long double) n; |
813
|
58
|
|
|
|
|
|
long double flogn = logl(n); |
814
|
58
|
|
|
|
|
|
long double fnlog2n = fn * flogn * flogn; |
815
|
|
|
|
|
|
|
UV lo, hi; |
816
|
|
|
|
|
|
|
|
817
|
58
|
100
|
|
|
|
|
if (n < 6) |
818
|
1
|
|
|
|
|
|
return nth_twin_prime(n); |
819
|
|
|
|
|
|
|
|
820
|
|
|
|
|
|
|
/* Binary search on the TPC estimate. |
821
|
|
|
|
|
|
|
* Good results require that the TPC estimate is both fast and accurate. |
822
|
|
|
|
|
|
|
* These bounds are good for the actual nth_twin_prime values. |
823
|
|
|
|
|
|
|
*/ |
824
|
57
|
|
|
|
|
|
lo = (UV) (0.9 * fnlog2n); |
825
|
57
|
50
|
|
|
|
|
hi = (UV) ( (n >= 1e16) ? (1.04 * fnlog2n) : |
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
826
|
57
|
|
|
|
|
|
(n >= 1e13) ? (1.10 * fnlog2n) : |
827
|
59
|
|
|
|
|
|
(n >= 1e7 ) ? (1.31 * fnlog2n) : |
828
|
5
|
|
|
|
|
|
(n >= 1200) ? (1.70 * fnlog2n) : |
829
|
50
|
|
|
|
|
|
(2.3 * fnlog2n + 5) ); |
830
|
57
|
50
|
|
|
|
|
if (hi <= lo) hi = UV_MAX; |
831
|
673
|
100
|
|
|
|
|
while (lo < hi) { |
832
|
616
|
|
|
|
|
|
UV mid = lo + (hi-lo)/2; |
833
|
616
|
100
|
|
|
|
|
if (twin_prime_count_approx(mid) < fn) lo = mid+1; |
834
|
287
|
|
|
|
|
|
else hi = mid; |
835
|
|
|
|
|
|
|
} |
836
|
57
|
|
|
|
|
|
return lo; |
837
|
|
|
|
|
|
|
} |
838
|
|
|
|
|
|
|
|
839
|
|
|
|
|
|
|
/******************************************************************************/ |
840
|
|
|
|
|
|
|
/* SUMS */ |
841
|
|
|
|
|
|
|
/******************************************************************************/ |
842
|
|
|
|
|
|
|
|
843
|
|
|
|
|
|
|
/* The fastest way to compute the sum of primes is using a combinatorial |
844
|
|
|
|
|
|
|
* algorithm such as Deleglise 2012. Since this code is purely native, |
845
|
|
|
|
|
|
|
* it will overflow a 64-bit result quite quickly. Hence a relatively small |
846
|
|
|
|
|
|
|
* table plus sum over sieved primes works quite well. |
847
|
|
|
|
|
|
|
* |
848
|
|
|
|
|
|
|
* The following info is useful if we ever return 128-bit results or for a |
849
|
|
|
|
|
|
|
* GMP implementation. |
850
|
|
|
|
|
|
|
* |
851
|
|
|
|
|
|
|
* Combinatorial sum of primes < n. Call with phisum(n, isqrt(n)). |
852
|
|
|
|
|
|
|
* Needs optimization, either caching, Lehmer, or LMO. |
853
|
|
|
|
|
|
|
* http://mathoverflow.net/questions/81443/fastest-algorithm-to-compute-the-sum-of-primes |
854
|
|
|
|
|
|
|
* http://www.ams.org/journals/mcom/2009-78-268/S0025-5718-09-02249-2/S0025-5718-09-02249-2.pdf |
855
|
|
|
|
|
|
|
* http://mathematica.stackexchange.com/questions/80291/efficient-way-to-sum-all-the-primes-below-n-million-in-mathematica |
856
|
|
|
|
|
|
|
* Deleglise 2012, page 27, simple Meissel: |
857
|
|
|
|
|
|
|
* y = x^1/3 |
858
|
|
|
|
|
|
|
* a = Pi(y) |
859
|
|
|
|
|
|
|
* Pi_f(x) = phisum(x,a) + Pi_f(y) - 1 - P_2(x,a) |
860
|
|
|
|
|
|
|
* P_2(x,a) = sum prime p : y < p <= sqrt(x) of f(p) * Pi_f(x/p) - |
861
|
|
|
|
|
|
|
* sum prime p : y < p <= sqrt(x) of f(p) * Pi_f(p-1) |
862
|
|
|
|
|
|
|
*/ |
863
|
|
|
|
|
|
|
|
864
|
|
|
|
|
|
|
static const unsigned char byte_sum[256] = |
865
|
|
|
|
|
|
|
{120,119,113,112,109,108,102,101,107,106,100,99,96,95,89,88,103,102,96,95,92, |
866
|
|
|
|
|
|
|
91,85,84,90,89,83,82,79,78,72,71,101,100,94,93,90,89,83,82,88,87,81,80,77, |
867
|
|
|
|
|
|
|
76,70,69,84,83,77,76,73,72,66,65,71,70,64,63,60,59,53,52,97,96,90,89,86,85, |
868
|
|
|
|
|
|
|
79,78,84,83,77,76,73,72,66,65,80,79,73,72,69,68,62,61,67,66,60,59,56,55,49, |
869
|
|
|
|
|
|
|
48,78,77,71,70,67,66,60,59,65,64,58,57,54,53,47,46,61,60,54,53,50,49,43,42, |
870
|
|
|
|
|
|
|
48,47,41,40,37,36,30,29,91,90,84,83,80,79,73,72,78,77,71,70,67,66,60,59,74, |
871
|
|
|
|
|
|
|
73,67,66,63,62,56,55,61,60,54,53,50,49,43,42,72,71,65,64,61,60,54,53,59,58, |
872
|
|
|
|
|
|
|
52,51,48,47,41,40,55,54,48,47,44,43,37,36,42,41,35,34,31,30,24,23,68,67,61, |
873
|
|
|
|
|
|
|
60,57,56,50,49,55,54,48,47,44,43,37,36,51,50,44,43,40,39,33,32,38,37,31,30, |
874
|
|
|
|
|
|
|
27,26,20,19,49,48,42,41,38,37,31,30,36,35,29,28,25,24,18,17,32,31,25,24,21, |
875
|
|
|
|
|
|
|
20,14,13,19,18,12,11,8,7,1,0}; |
876
|
|
|
|
|
|
|
|
877
|
|
|
|
|
|
|
#if BITS_PER_WORD == 64 |
878
|
|
|
|
|
|
|
/* We have a much more limited range, so use a fixed interval. We should be |
879
|
|
|
|
|
|
|
* able to get any 64-bit sum in under a half-second. */ |
880
|
|
|
|
|
|
|
static const UV sum_table_2e8[] = |
881
|
|
|
|
|
|
|
{1075207199997324,3071230303170813,4990865886639877,6872723092050268,8729485610396243,10566436676784677,12388862798895708,14198556341669206,15997206121881531,17783028661796383,19566685687136351,21339485298848693,23108856419719148, |
882
|
|
|
|
|
|
|
24861364231151903,26619321031799321,28368484289421890,30110050320271201,31856321671656548,33592089385327108,35316546074029522,37040262208390735,38774260466286299,40490125006181147,42207686658844380,43915802985817228,45635106002281013, |
883
|
|
|
|
|
|
|
47337822860157465,49047713696453759,50750666660265584,52449748364487290,54152689180758005,55832433395290183,57540651847418233,59224867245128289,60907462954737468,62597192477315868,64283665223856098,65961576139329367,67641982565760928, |
884
|
|
|
|
|
|
|
69339211720915217,71006044680007261,72690896543747616,74358564592509127,76016548794894677,77694517638354266,79351385193517953,81053240048141953,82698120948724835,84380724263091726,86028655116421543,87679091888973563,89348007111430334, |
885
|
|
|
|
|
|
|
90995902774878695,92678527127292212,94313220293410120,95988730932107432,97603162494502485,99310622699836698,100935243057337310,102572075478649557,104236362884241550,105885045921116836,107546170993472638,109163445284201278, |
886
|
|
|
|
|
|
|
110835950755374921,112461991135144669,114116351921245042,115740770232532531,117408250788520189,119007914428335965,120652479429703269,122317415246500401,123951466213858688,125596789655927842,127204379051939418,128867944265073217, |
887
|
|
|
|
|
|
|
130480037123800711,132121840147764197,133752985360747726,135365954823762234,137014594650995101,138614165689305879,140269121741383097,141915099618762647,143529289083557618,145146413750649432,146751434858695468,148397902396643807, |
888
|
|
|
|
|
|
|
149990139346918801,151661665434334577,153236861034424304,154885985064643097,156500983286383741,158120868946747299,159735201435796748,161399264792716319,162999489977602579,164566400448130092,166219688860475191,167836981098849796, |
889
|
|
|
|
|
|
|
169447127305804401,171078187147848898,172678849082290997,174284436375728242,175918609754056455,177525046501311788,179125593738290153,180765176633753371,182338473848291683,183966529541155489,185585792988238475,187131988176321434, |
890
|
|
|
|
|
|
|
188797837140841381,190397649440649965,191981841583560122,193609739194967419,195166830650558070,196865965063113041,198400070713177440,200057161591648721,201621899486413406,203238279253414934,204790684829891896,206407676204061001, |
891
|
|
|
|
|
|
|
208061050481364659,209641606658938873,211192088300183855,212855420483750498,214394145510853736,216036806225784861,217628995137940563,219277567478725189,220833877268454872,222430818525363309,224007307616922530,225640739533952807, |
892
|
|
|
|
|
|
|
227213096159236934,228853318075566255,230401824696558125,231961445347821085,233593317860593895,235124654760954338,236777716068869769,238431514923528303,239965003913481640,241515977959535845,243129874530821395}; |
893
|
|
|
|
|
|
|
#define N_SUM_TABLE (sizeof(sum_table_2e8)/sizeof(sum_table_2e8[0])) |
894
|
|
|
|
|
|
|
#endif |
895
|
|
|
|
|
|
|
|
896
|
|
|
|
|
|
|
/* Add n to the double-word hi,lo */ |
897
|
|
|
|
|
|
|
#define ADD_128(hi, lo, n) \ |
898
|
|
|
|
|
|
|
do { UV _n = n; \ |
899
|
|
|
|
|
|
|
if (_n > (UV_MAX-lo)) { hi++; if (hi == 0) overflow = 1; } \ |
900
|
|
|
|
|
|
|
lo += _n; } while (0) |
901
|
|
|
|
|
|
|
#define SET_128(hi, lo, n) \ |
902
|
|
|
|
|
|
|
do { hi = (UV) (((n) >> 64) & UV_MAX); \ |
903
|
|
|
|
|
|
|
lo = (UV) (((n) ) & UV_MAX); } while (0) |
904
|
|
|
|
|
|
|
|
905
|
|
|
|
|
|
|
/* Legendre method for prime sum */ |
906
|
0
|
|
|
|
|
|
int sum_primes128(UV n, UV *hi_sum, UV *lo_sum) { |
907
|
|
|
|
|
|
|
#if BITS_PER_WORD == 64 && HAVE_UINT128 |
908
|
|
|
|
|
|
|
uint128_t *V, *S; |
909
|
0
|
|
|
|
|
|
UV j, k, r = isqrt(n), r2 = r + n/(r+1); |
910
|
|
|
|
|
|
|
|
911
|
0
|
0
|
|
|
|
|
New(0, V, r2+1, uint128_t); |
912
|
0
|
0
|
|
|
|
|
New(0, S, r2+1, uint128_t); |
913
|
0
|
0
|
|
|
|
|
for (k = 0; k <= r2; k++) { |
914
|
0
|
0
|
|
|
|
|
uint128_t v = (k <= r) ? k : n/(r2-k+1); |
915
|
0
|
|
|
|
|
|
V[k] = v; |
916
|
0
|
|
|
|
|
|
S[k] = (v*(v+1))/2 - 1; |
917
|
|
|
|
|
|
|
} |
918
|
|
|
|
|
|
|
|
919
|
0
|
0
|
|
|
|
|
START_DO_FOR_EACH_PRIME(2, r) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
920
|
0
|
|
|
|
|
|
uint128_t a, b, sp = S[p-1], p2 = ((uint128_t)p) * p; |
921
|
0
|
0
|
|
|
|
|
for (j = k-1; j > 1 && V[j] >= p2; j--) { |
|
|
0
|
|
|
|
|
|
922
|
0
|
|
|
|
|
|
a = V[j], b = a/p; |
923
|
0
|
0
|
|
|
|
|
if (a > r) a = r2 - n/a + 1; |
924
|
0
|
0
|
|
|
|
|
if (b > r) b = r2 - n/b + 1; |
925
|
0
|
|
|
|
|
|
S[a] -= p * (S[b] - sp); /* sp = sum of primes less than p */ |
926
|
|
|
|
|
|
|
} |
927
|
0
|
|
|
|
|
|
} END_DO_FOR_EACH_PRIME; |
928
|
0
|
|
|
|
|
|
SET_128(*hi_sum, *lo_sum, S[r2]); |
929
|
0
|
|
|
|
|
|
Safefree(V); |
930
|
0
|
|
|
|
|
|
Safefree(S); |
931
|
0
|
|
|
|
|
|
return 1; |
932
|
|
|
|
|
|
|
#else |
933
|
|
|
|
|
|
|
return 0; |
934
|
|
|
|
|
|
|
#endif |
935
|
|
|
|
|
|
|
} |
936
|
|
|
|
|
|
|
|
937
|
1003
|
|
|
|
|
|
int sum_primes(UV low, UV high, UV *return_sum) { |
938
|
1003
|
|
|
|
|
|
UV sum = 0; |
939
|
1003
|
|
|
|
|
|
int overflow = 0; |
940
|
|
|
|
|
|
|
|
941
|
1003
|
100
|
|
|
|
|
if ((low <= 2) && (high >= 2)) sum += 2; |
|
|
50
|
|
|
|
|
|
942
|
1003
|
100
|
|
|
|
|
if ((low <= 3) && (high >= 3)) sum += 3; |
|
|
100
|
|
|
|
|
|
943
|
1003
|
100
|
|
|
|
|
if ((low <= 5) && (high >= 5)) sum += 5; |
|
|
100
|
|
|
|
|
|
944
|
1003
|
100
|
|
|
|
|
if (low < 7) low = 7; |
945
|
|
|
|
|
|
|
|
946
|
|
|
|
|
|
|
/* If we know the range will overflow, return now */ |
947
|
|
|
|
|
|
|
#if BITS_PER_WORD == 64 |
948
|
1003
|
100
|
|
|
|
|
if (low == 7 && high >= 29505444491) return 0; |
|
|
50
|
|
|
|
|
|
949
|
1003
|
50
|
|
|
|
|
if (low >= 1e10 && (high-low) >= 32e9) return 0; |
|
|
0
|
|
|
|
|
|
950
|
1003
|
50
|
|
|
|
|
if (low >= 1e13 && (high-low) >= 5e7) return 0; |
|
|
0
|
|
|
|
|
|
951
|
|
|
|
|
|
|
#else |
952
|
|
|
|
|
|
|
if (low == 7 && high >= 323381) return 0; |
953
|
|
|
|
|
|
|
#endif |
954
|
|
|
|
|
|
|
|
955
|
|
|
|
|
|
|
#if 1 && BITS_PER_WORD == 64 /* Tables */ |
956
|
1003
|
100
|
|
|
|
|
if (low == 7 && high >= 2e8) { |
|
|
50
|
|
|
|
|
|
957
|
|
|
|
|
|
|
UV step; |
958
|
0
|
0
|
|
|
|
|
for (step = 1; high >= (step * 2e8) && step < N_SUM_TABLE; step++) { |
|
|
0
|
|
|
|
|
|
959
|
0
|
|
|
|
|
|
sum += sum_table_2e8[step-1]; |
960
|
0
|
|
|
|
|
|
low = step * 2e8; |
961
|
|
|
|
|
|
|
} |
962
|
|
|
|
|
|
|
} |
963
|
|
|
|
|
|
|
#endif |
964
|
|
|
|
|
|
|
|
965
|
1003
|
100
|
|
|
|
|
if (low <= high) { |
966
|
|
|
|
|
|
|
unsigned char* segment; |
967
|
|
|
|
|
|
|
UV seg_base, seg_low, seg_high; |
968
|
998
|
|
|
|
|
|
void* ctx = start_segment_primes(low, high, &segment); |
969
|
1996
|
50
|
|
|
|
|
while (!overflow && next_segment_primes(ctx, &seg_base, &seg_low, &seg_high)) { |
|
|
100
|
|
|
|
|
|
970
|
998
|
|
|
|
|
|
UV bytes = seg_high/30 - seg_low/30 + 1; |
971
|
|
|
|
|
|
|
unsigned char s; |
972
|
998
|
|
|
|
|
|
unsigned char* sp = segment; |
973
|
998
|
|
|
|
|
|
unsigned char* const spend = segment + bytes - 1; |
974
|
998
|
|
|
|
|
|
UV i, p, pbase = 30*(seg_low/30); |
975
|
|
|
|
|
|
|
|
976
|
|
|
|
|
|
|
/* Clear primes before and after our range */ |
977
|
998
|
|
|
|
|
|
p = pbase; |
978
|
2005
|
50
|
|
|
|
|
for (i = 0; i < 8 && p+wheel30[i] < low; i++) |
|
|
100
|
|
|
|
|
|
979
|
1007
|
100
|
|
|
|
|
if ( (*sp & (1<
|
980
|
3
|
|
|
|
|
|
*sp |= (1 << i); |
981
|
|
|
|
|
|
|
|
982
|
998
|
|
|
|
|
|
p = 30*(seg_high/30); |
983
|
8982
|
100
|
|
|
|
|
for (i = 0; i < 8; i++) |
984
|
7984
|
100
|
|
|
|
|
if ( (*spend & (1< high ) |
|
|
100
|
|
|
|
|
|
985
|
2459
|
|
|
|
|
|
*spend |= (1 << i); |
986
|
|
|
|
|
|
|
|
987
|
29639
|
100
|
|
|
|
|
while (sp <= spend) { |
988
|
28641
|
|
|
|
|
|
s = *sp++; |
989
|
28641
|
50
|
|
|
|
|
if (sum < (UV_MAX >> 3) && pbase < (UV_MAX >> 5)) { |
|
|
50
|
|
|
|
|
|
990
|
|
|
|
|
|
|
/* sum block of 8 all at once */ |
991
|
28641
|
|
|
|
|
|
sum += pbase * byte_zeros[s] + byte_sum[s]; |
992
|
|
|
|
|
|
|
} else { |
993
|
|
|
|
|
|
|
/* sum block of 8, checking for overflow at each step */ |
994
|
0
|
0
|
|
|
|
|
for (i = 0; i < byte_zeros[s]; i++) { |
995
|
0
|
0
|
|
|
|
|
if (sum+pbase < sum) overflow = 1; |
996
|
0
|
|
|
|
|
|
sum += pbase; |
997
|
|
|
|
|
|
|
} |
998
|
0
|
0
|
|
|
|
|
if (sum+byte_sum[s] < sum) overflow = 1; |
999
|
0
|
|
|
|
|
|
sum += byte_sum[s]; |
1000
|
0
|
0
|
|
|
|
|
if (overflow) break; |
1001
|
|
|
|
|
|
|
} |
1002
|
28641
|
|
|
|
|
|
pbase += 30; |
1003
|
|
|
|
|
|
|
} |
1004
|
|
|
|
|
|
|
} |
1005
|
998
|
|
|
|
|
|
end_segment_primes(ctx); |
1006
|
|
|
|
|
|
|
} |
1007
|
1003
|
50
|
|
|
|
|
if (!overflow && return_sum != 0) *return_sum = sum; |
|
|
50
|
|
|
|
|
|
1008
|
1003
|
|
|
|
|
|
return !overflow; |
1009
|
|
|
|
|
|
|
} |
1010
|
|
|
|
|
|
|
|
1011
|
526
|
|
|
|
|
|
double ramanujan_sa_gn(UV un) |
1012
|
|
|
|
|
|
|
{ |
1013
|
526
|
|
|
|
|
|
long double n = (long double) un; |
1014
|
526
|
|
|
|
|
|
long double logn = logl(n); |
1015
|
526
|
|
|
|
|
|
long double log2 = logl(2); |
1016
|
|
|
|
|
|
|
|
1017
|
526
|
|
|
|
|
|
return (double)( (logn + logl(logn) - log2 - 0.5) / (log2 + 0.5) ); |
1018
|
|
|
|
|
|
|
} |