| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
#include |
|
2
|
|
|
|
|
|
|
#include |
|
3
|
|
|
|
|
|
|
#include |
|
4
|
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
#define FUNC_isqrt 1 |
|
6
|
|
|
|
|
|
|
#include "ptypes.h" |
|
7
|
|
|
|
|
|
|
#include "constants.h" |
|
8
|
|
|
|
|
|
|
#include "util.h" |
|
9
|
|
|
|
|
|
|
#include "cache.h" |
|
10
|
|
|
|
|
|
|
#include "sieve.h" |
|
11
|
|
|
|
|
|
|
#include "prime_sums.h" |
|
12
|
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
/******************************************************************************/ |
|
14
|
|
|
|
|
|
|
/* SUMS */ |
|
15
|
|
|
|
|
|
|
/******************************************************************************/ |
|
16
|
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
/* As an aside, good information about bounds and approximations can be |
|
18
|
|
|
|
|
|
|
* found in Axler (2019) "On the sum of the first n prime numbers" |
|
19
|
|
|
|
|
|
|
* https://jtnb.centre-mersenne.org/item/10.5802/jtnb.1081.pdf |
|
20
|
|
|
|
|
|
|
*/ |
|
21
|
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
static const unsigned char byte_zeros[256] = |
|
23
|
|
|
|
|
|
|
{8,7,7,6,7,6,6,5,7,6,6,5,6,5,5,4,7,6,6,5,6,5,5,4,6,5,5,4,5,4,4,3, |
|
24
|
|
|
|
|
|
|
7,6,6,5,6,5,5,4,6,5,5,4,5,4,4,3,6,5,5,4,5,4,4,3,5,4,4,3,4,3,3,2, |
|
25
|
|
|
|
|
|
|
7,6,6,5,6,5,5,4,6,5,5,4,5,4,4,3,6,5,5,4,5,4,4,3,5,4,4,3,4,3,3,2, |
|
26
|
|
|
|
|
|
|
6,5,5,4,5,4,4,3,5,4,4,3,4,3,3,2,5,4,4,3,4,3,3,2,4,3,3,2,3,2,2,1, |
|
27
|
|
|
|
|
|
|
7,6,6,5,6,5,5,4,6,5,5,4,5,4,4,3,6,5,5,4,5,4,4,3,5,4,4,3,4,3,3,2, |
|
28
|
|
|
|
|
|
|
6,5,5,4,5,4,4,3,5,4,4,3,4,3,3,2,5,4,4,3,4,3,3,2,4,3,3,2,3,2,2,1, |
|
29
|
|
|
|
|
|
|
6,5,5,4,5,4,4,3,5,4,4,3,4,3,3,2,5,4,4,3,4,3,3,2,4,3,3,2,3,2,2,1, |
|
30
|
|
|
|
|
|
|
5,4,4,3,4,3,3,2,4,3,3,2,3,2,2,1,4,3,3,2,3,2,2,1,3,2,2,1,2,1,1,0}; |
|
31
|
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
/* The fastest way to compute the sum of primes is using a combinatorial |
|
33
|
|
|
|
|
|
|
* algorithm such as Deleglise-Rivat or Gourdon. This is what Kim Walisch's |
|
34
|
|
|
|
|
|
|
* primesum program does. Note that one quickly needs 128-bit or larger |
|
35
|
|
|
|
|
|
|
* storage, as the sums grow rapidly. |
|
36
|
|
|
|
|
|
|
* |
|
37
|
|
|
|
|
|
|
* We are using much simpler methods. Performance at small sizes is also a |
|
38
|
|
|
|
|
|
|
* consideration. Using tables combined with summing over sieved primes can |
|
39
|
|
|
|
|
|
|
* work well with small input sizes. |
|
40
|
|
|
|
|
|
|
*/ |
|
41
|
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
/* Simplified Legendre method giving pisum(n) for n <= 65535 or 4294967295. */ |
|
43
|
|
|
|
|
|
|
|
|
44
|
19
|
|
|
|
|
|
UV sum_primes64(UV n) { |
|
45
|
|
|
|
|
|
|
uint32_t *V, j, k, r, r2, p; |
|
46
|
|
|
|
|
|
|
UV *S, sum; |
|
47
|
|
|
|
|
|
|
|
|
48
|
19
|
50
|
|
|
|
|
if (n < 2 || (n >> (BITS_PER_WORD/2)) > 0) /* S[] will overflow */ |
|
|
|
100
|
|
|
|
|
|
|
49
|
7
|
|
|
|
|
|
return 0; |
|
50
|
12
|
|
|
|
|
|
r = isqrt(n); |
|
51
|
12
|
|
|
|
|
|
r2 = r + n/(r+1); |
|
52
|
|
|
|
|
|
|
|
|
53
|
12
|
|
|
|
|
|
New(0, V, r2+1, uint32_t); |
|
54
|
12
|
|
|
|
|
|
New(0, S, r2+1, UV); |
|
55
|
444200
|
100
|
|
|
|
|
for (k = 1; k <= r2; k++) { |
|
56
|
444188
|
100
|
|
|
|
|
UV v = (k <= r) ? k : n/(r2-k+1); |
|
57
|
444188
|
|
|
|
|
|
V[k] = v; |
|
58
|
444188
|
|
|
|
|
|
S[k] = ((v*(v-1))>>1) + (v-1); |
|
59
|
|
|
|
|
|
|
} |
|
60
|
|
|
|
|
|
|
|
|
61
|
222096
|
100
|
|
|
|
|
for (p = 2; p <= r; p++) { |
|
62
|
222084
|
100
|
|
|
|
|
if (S[p] > S[p-1]) { /* For each prime p from 2 to r */ |
|
63
|
23284
|
|
|
|
|
|
UV sp = S[p-1], p2 = p*p; |
|
64
|
25830282
|
50
|
|
|
|
|
for (j = r2; j > 1 && V[j] >= p2; j--) { |
|
|
|
100
|
|
|
|
|
|
|
65
|
25806998
|
|
|
|
|
|
uint32_t a = V[j], b = a/p; |
|
66
|
25806998
|
100
|
|
|
|
|
if (a > r) a = r2 - n/a + 1; |
|
67
|
25806998
|
100
|
|
|
|
|
if (b > r) b = r2 - n/b + 1; |
|
68
|
25806998
|
|
|
|
|
|
S[a] -= (UV)p * (S[b] - sp); /* sp = sum of primes less than p */ |
|
69
|
|
|
|
|
|
|
} |
|
70
|
|
|
|
|
|
|
} |
|
71
|
|
|
|
|
|
|
} |
|
72
|
12
|
|
|
|
|
|
sum = S[r2]; |
|
73
|
12
|
|
|
|
|
|
Safefree(V); |
|
74
|
12
|
|
|
|
|
|
Safefree(S); |
|
75
|
12
|
|
|
|
|
|
return sum; |
|
76
|
|
|
|
|
|
|
} |
|
77
|
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
/* Simplified Legendre method giving pisum(n) for any 64-bit input n, |
|
79
|
|
|
|
|
|
|
* assuming the uint128_t type is available. The result is returned as |
|
80
|
|
|
|
|
|
|
* two 64-bit results. */ |
|
81
|
|
|
|
|
|
|
|
|
82
|
1
|
|
|
|
|
|
bool sum_primes128(UV n, UV *hi_sum, UV *lo_sum) { |
|
83
|
|
|
|
|
|
|
#if HAVE_SUM_PRIMES128 |
|
84
|
|
|
|
|
|
|
uint128_t *S; |
|
85
|
|
|
|
|
|
|
UV *V, j, k, r, r2, p; |
|
86
|
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
/* pisum(2^64-1) < 2^128-1, so no overflow issues */ |
|
88
|
1
|
|
|
|
|
|
r = isqrt(n); |
|
89
|
1
|
|
|
|
|
|
r2 = r + n/(r+1); |
|
90
|
|
|
|
|
|
|
|
|
91
|
1
|
50
|
|
|
|
|
New(0, V, r2+1, UV); |
|
92
|
1
|
50
|
|
|
|
|
New(0, S, r2+1, uint128_t); |
|
93
|
343543
|
100
|
|
|
|
|
for (k = 0; k <= r2; k++) { |
|
94
|
343542
|
100
|
|
|
|
|
UV v = (k <= r) ? k : n/(r2-k+1); |
|
95
|
343542
|
|
|
|
|
|
V[k] = v; |
|
96
|
343542
|
|
|
|
|
|
S[k] = ((uint128_t)v+1)/2 * (v|1) - 1; /* (v*(v+1))/2-1 */ |
|
97
|
|
|
|
|
|
|
} |
|
98
|
|
|
|
|
|
|
|
|
99
|
171771
|
100
|
|
|
|
|
for (p = 2; p <= r; p++) { |
|
100
|
171770
|
100
|
|
|
|
|
if (S[p] > S[p-1]) { /* For each prime p from 2 to r */ |
|
101
|
15648
|
|
|
|
|
|
uint128_t sp = S[p-1], p2 = ((uint128_t)p) * p; |
|
102
|
33779919
|
50
|
|
|
|
|
for (j = r2; j > 1 && V[j] >= p2; j--) { |
|
|
|
100
|
|
|
|
|
|
|
103
|
33764271
|
|
|
|
|
|
UV a = V[j], b = a/p; |
|
104
|
33764271
|
100
|
|
|
|
|
if (a > r) a = r2 - n/a + 1; |
|
105
|
33764271
|
100
|
|
|
|
|
if (b > r) b = r2 - n/b + 1; |
|
106
|
33764271
|
|
|
|
|
|
S[a] -= p * (S[b] - sp); /* sp = sum of primes less than p */ |
|
107
|
|
|
|
|
|
|
} |
|
108
|
|
|
|
|
|
|
} |
|
109
|
|
|
|
|
|
|
} |
|
110
|
1
|
|
|
|
|
|
*hi_sum = (S[r2] >> 64) & UV_MAX; |
|
111
|
1
|
|
|
|
|
|
*lo_sum = (S[r2] ) & UV_MAX; |
|
112
|
1
|
|
|
|
|
|
Safefree(V); |
|
113
|
1
|
|
|
|
|
|
Safefree(S); |
|
114
|
1
|
|
|
|
|
|
return 1; |
|
115
|
|
|
|
|
|
|
#else |
|
116
|
|
|
|
|
|
|
return 0; |
|
117
|
|
|
|
|
|
|
#endif |
|
118
|
|
|
|
|
|
|
} |
|
119
|
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
/* sum primes in a 64-bit range using a sieving with table acceleration */ |
|
122
|
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
static const unsigned char byte_sum[256] = |
|
124
|
|
|
|
|
|
|
{120,119,113,112,109,108,102,101,107,106,100,99,96,95,89,88,103,102,96,95,92, |
|
125
|
|
|
|
|
|
|
91,85,84,90,89,83,82,79,78,72,71,101,100,94,93,90,89,83,82,88,87,81,80,77, |
|
126
|
|
|
|
|
|
|
76,70,69,84,83,77,76,73,72,66,65,71,70,64,63,60,59,53,52,97,96,90,89,86,85, |
|
127
|
|
|
|
|
|
|
79,78,84,83,77,76,73,72,66,65,80,79,73,72,69,68,62,61,67,66,60,59,56,55,49, |
|
128
|
|
|
|
|
|
|
48,78,77,71,70,67,66,60,59,65,64,58,57,54,53,47,46,61,60,54,53,50,49,43,42, |
|
129
|
|
|
|
|
|
|
48,47,41,40,37,36,30,29,91,90,84,83,80,79,73,72,78,77,71,70,67,66,60,59,74, |
|
130
|
|
|
|
|
|
|
73,67,66,63,62,56,55,61,60,54,53,50,49,43,42,72,71,65,64,61,60,54,53,59,58, |
|
131
|
|
|
|
|
|
|
52,51,48,47,41,40,55,54,48,47,44,43,37,36,42,41,35,34,31,30,24,23,68,67,61, |
|
132
|
|
|
|
|
|
|
60,57,56,50,49,55,54,48,47,44,43,37,36,51,50,44,43,40,39,33,32,38,37,31,30, |
|
133
|
|
|
|
|
|
|
27,26,20,19,49,48,42,41,38,37,31,30,36,35,29,28,25,24,18,17,32,31,25,24,21, |
|
134
|
|
|
|
|
|
|
20,14,13,19,18,12,11,8,7,1,0}; |
|
135
|
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
#if BITS_PER_WORD == 64 |
|
137
|
|
|
|
|
|
|
/* We have a much more limited range, so use a fixed interval. We should be |
|
138
|
|
|
|
|
|
|
* able to get any 64-bit sum in under a half-second. */ |
|
139
|
|
|
|
|
|
|
static const UV sum_table_2e8[] = |
|
140
|
|
|
|
|
|
|
{1075207199997324,3071230303170813,4990865886639877,6872723092050268,8729485610396243,10566436676784677,12388862798895708,14198556341669206,15997206121881531,17783028661796383,19566685687136351,21339485298848693,23108856419719148, |
|
141
|
|
|
|
|
|
|
24861364231151903,26619321031799321,28368484289421890,30110050320271201,31856321671656548,33592089385327108,35316546074029522,37040262208390735,38774260466286299,40490125006181147,42207686658844380,43915802985817228,45635106002281013, |
|
142
|
|
|
|
|
|
|
47337822860157465,49047713696453759,50750666660265584,52449748364487290,54152689180758005,55832433395290183,57540651847418233,59224867245128289,60907462954737468,62597192477315868,64283665223856098,65961576139329367,67641982565760928, |
|
143
|
|
|
|
|
|
|
69339211720915217,71006044680007261,72690896543747616,74358564592509127,76016548794894677,77694517638354266,79351385193517953,81053240048141953,82698120948724835,84380724263091726,86028655116421543,87679091888973563,89348007111430334, |
|
144
|
|
|
|
|
|
|
90995902774878695,92678527127292212,94313220293410120,95988730932107432,97603162494502485,99310622699836698,100935243057337310,102572075478649557,104236362884241550,105885045921116836,107546170993472638,109163445284201278, |
|
145
|
|
|
|
|
|
|
110835950755374921,112461991135144669,114116351921245042,115740770232532531,117408250788520189,119007914428335965,120652479429703269,122317415246500401,123951466213858688,125596789655927842,127204379051939418,128867944265073217, |
|
146
|
|
|
|
|
|
|
130480037123800711,132121840147764197,133752985360747726,135365954823762234,137014594650995101,138614165689305879,140269121741383097,141915099618762647,143529289083557618,145146413750649432,146751434858695468,148397902396643807, |
|
147
|
|
|
|
|
|
|
149990139346918801,151661665434334577,153236861034424304,154885985064643097,156500983286383741,158120868946747299,159735201435796748,161399264792716319,162999489977602579,164566400448130092,166219688860475191,167836981098849796, |
|
148
|
|
|
|
|
|
|
169447127305804401,171078187147848898,172678849082290997,174284436375728242,175918609754056455,177525046501311788,179125593738290153,180765176633753371,182338473848291683,183966529541155489,185585792988238475,187131988176321434, |
|
149
|
|
|
|
|
|
|
188797837140841381,190397649440649965,191981841583560122,193609739194967419,195166830650558070,196865965063113041,198400070713177440,200057161591648721,201621899486413406,203238279253414934,204790684829891896,206407676204061001, |
|
150
|
|
|
|
|
|
|
208061050481364659,209641606658938873,211192088300183855,212855420483750498,214394145510853736,216036806225784861,217628995137940563,219277567478725189,220833877268454872,222430818525363309,224007307616922530,225640739533952807, |
|
151
|
|
|
|
|
|
|
227213096159236934,228853318075566255,230401824696558125,231961445347821085,233593317860593895,235124654760954338,236777716068869769,238431514923528303,239965003913481640,241515977959535845,243129874530821395}; |
|
152
|
|
|
|
|
|
|
#define N_SUM_TABLE (sizeof(sum_table_2e8)/sizeof(sum_table_2e8[0])) |
|
153
|
|
|
|
|
|
|
#endif |
|
154
|
|
|
|
|
|
|
|
|
155
|
1030
|
|
|
|
|
|
bool sum_primes(UV low, UV high, UV *return_sum) { |
|
156
|
1030
|
|
|
|
|
|
UV sum = 0; |
|
157
|
1030
|
|
|
|
|
|
bool overflow = 0; |
|
158
|
|
|
|
|
|
|
|
|
159
|
1030
|
100
|
|
|
|
|
if (low <= 2 && high >= 100000) { |
|
|
|
100
|
|
|
|
|
|
|
160
|
19
|
|
|
|
|
|
*return_sum = sum_primes64(high); |
|
161
|
19
|
100
|
|
|
|
|
if (*return_sum != 0) |
|
162
|
12
|
|
|
|
|
|
return 1; |
|
163
|
|
|
|
|
|
|
} |
|
164
|
|
|
|
|
|
|
/* TODO: performance: more cases where using sum_primes64 is faster. */ |
|
165
|
|
|
|
|
|
|
|
|
166
|
1018
|
100
|
|
|
|
|
if ((low <= 2) && (high >= 2)) sum += 2; |
|
|
|
100
|
|
|
|
|
|
|
167
|
1018
|
100
|
|
|
|
|
if ((low <= 3) && (high >= 3)) sum += 3; |
|
|
|
100
|
|
|
|
|
|
|
168
|
1018
|
100
|
|
|
|
|
if ((low <= 5) && (high >= 5)) sum += 5; |
|
|
|
100
|
|
|
|
|
|
|
169
|
1018
|
100
|
|
|
|
|
if (low < 7) low = 7; |
|
170
|
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
/* If we know the range will overflow, return now */ |
|
172
|
|
|
|
|
|
|
#if BITS_PER_WORD == 64 |
|
173
|
1018
|
100
|
|
|
|
|
if (low == 7 && high >= 29505444491) return 0; |
|
|
|
100
|
|
|
|
|
|
|
174
|
1017
|
50
|
|
|
|
|
if (low >= 1e10 && (high-low) >= 32e9) return 0; |
|
|
|
0
|
|
|
|
|
|
|
175
|
1017
|
50
|
|
|
|
|
if (low >= 1e13 && (high-low) >= 5e7) return 0; |
|
|
|
0
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
#else |
|
177
|
|
|
|
|
|
|
if (low == 7 && high >= 323381) return 0; |
|
178
|
|
|
|
|
|
|
#endif |
|
179
|
|
|
|
|
|
|
|
|
180
|
|
|
|
|
|
|
#if 1 && BITS_PER_WORD == 64 /* Tables */ |
|
181
|
1017
|
100
|
|
|
|
|
if (low == 7 && high >= 2e8) { |
|
|
|
100
|
|
|
|
|
|
|
182
|
|
|
|
|
|
|
UV step; |
|
183
|
448
|
100
|
|
|
|
|
for (step = 1; high >= (step * 2e8) && step < N_SUM_TABLE; step++) { |
|
|
|
100
|
|
|
|
|
|
|
184
|
442
|
|
|
|
|
|
sum += sum_table_2e8[step-1]; |
|
185
|
442
|
|
|
|
|
|
low = step * 2e8; |
|
186
|
|
|
|
|
|
|
} |
|
187
|
|
|
|
|
|
|
} |
|
188
|
|
|
|
|
|
|
#endif |
|
189
|
|
|
|
|
|
|
|
|
190
|
1017
|
100
|
|
|
|
|
if (low <= high) { |
|
191
|
|
|
|
|
|
|
unsigned char* segment; |
|
192
|
|
|
|
|
|
|
UV seg_base, seg_low, seg_high; |
|
193
|
1010
|
|
|
|
|
|
void* ctx = start_segment_primes(low, high, &segment); |
|
194
|
2245
|
50
|
|
|
|
|
while (!overflow && next_segment_primes(ctx, &seg_base, &seg_low, &seg_high)) { |
|
|
|
100
|
|
|
|
|
|
|
195
|
1235
|
|
|
|
|
|
UV bytes = seg_high/30 - seg_low/30 + 1; |
|
196
|
|
|
|
|
|
|
unsigned char s; |
|
197
|
1235
|
|
|
|
|
|
unsigned char* sp = segment; |
|
198
|
1235
|
|
|
|
|
|
unsigned char* const spend = segment + bytes - 1; |
|
199
|
1235
|
|
|
|
|
|
UV i, p, pbase = 30*(seg_low/30); |
|
200
|
|
|
|
|
|
|
|
|
201
|
|
|
|
|
|
|
/* Clear primes before and after our range */ |
|
202
|
1235
|
|
|
|
|
|
p = pbase; |
|
203
|
2271
|
50
|
|
|
|
|
for (i = 0; i < 8 && p+wheel30[i] < low; i++) |
|
|
|
100
|
|
|
|
|
|
|
204
|
1036
|
100
|
|
|
|
|
if ( (*sp & (1<
|
|
205
|
5
|
|
|
|
|
|
*sp |= (1 << i); |
|
206
|
|
|
|
|
|
|
|
|
207
|
1235
|
|
|
|
|
|
p = 30*(seg_high/30); |
|
208
|
11115
|
100
|
|
|
|
|
for (i = 0; i < 8; i++) |
|
209
|
9880
|
100
|
|
|
|
|
if ( (*spend & (1< high ) |
|
|
|
100
|
|
|
|
|
|
|
210
|
2480
|
|
|
|
|
|
*spend |= (1 << i); |
|
211
|
|
|
|
|
|
|
|
|
212
|
20390020
|
100
|
|
|
|
|
while (sp <= spend) { |
|
213
|
20388785
|
|
|
|
|
|
s = *sp++; |
|
214
|
20388785
|
100
|
|
|
|
|
if (sum < (UV_MAX >> 3) && pbase < (UV_MAX >> 5)) { |
|
|
|
50
|
|
|
|
|
|
|
215
|
|
|
|
|
|
|
/* sum block of 8 all at once */ |
|
216
|
3192482
|
|
|
|
|
|
sum += pbase * byte_zeros[s] + byte_sum[s]; |
|
217
|
|
|
|
|
|
|
} else { |
|
218
|
|
|
|
|
|
|
/* sum block of 8, checking for overflow at each step */ |
|
219
|
38741186
|
100
|
|
|
|
|
for (i = 0; i < byte_zeros[s]; i++) { |
|
220
|
21544883
|
50
|
|
|
|
|
if (sum+pbase < sum) overflow = 1; |
|
221
|
21544883
|
|
|
|
|
|
sum += pbase; |
|
222
|
|
|
|
|
|
|
} |
|
223
|
17196303
|
50
|
|
|
|
|
if (sum+byte_sum[s] < sum) overflow = 1; |
|
224
|
17196303
|
|
|
|
|
|
sum += byte_sum[s]; |
|
225
|
17196303
|
50
|
|
|
|
|
if (overflow) break; |
|
226
|
|
|
|
|
|
|
} |
|
227
|
20388785
|
|
|
|
|
|
pbase += 30; |
|
228
|
|
|
|
|
|
|
} |
|
229
|
|
|
|
|
|
|
} |
|
230
|
1010
|
|
|
|
|
|
end_segment_primes(ctx); |
|
231
|
|
|
|
|
|
|
} |
|
232
|
1017
|
50
|
|
|
|
|
if (!overflow && return_sum != 0) *return_sum = sum; |
|
|
|
50
|
|
|
|
|
|
|
233
|
1017
|
|
|
|
|
|
return !overflow; |
|
234
|
|
|
|
|
|
|
} |