| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
#include |
|
2
|
|
|
|
|
|
|
#include |
|
3
|
|
|
|
|
|
|
#include |
|
4
|
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
#define FUNC_popcnt 1 |
|
6
|
|
|
|
|
|
|
#include "ptypes.h" |
|
7
|
|
|
|
|
|
|
#include "cache.h" |
|
8
|
|
|
|
|
|
|
#include "sieve.h" |
|
9
|
|
|
|
|
|
|
#include "util.h" |
|
10
|
|
|
|
|
|
|
#include "lmo.h" |
|
11
|
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
/* |
|
13
|
|
|
|
|
|
|
* Cache small counts directly using a uint32_t array. |
|
14
|
|
|
|
|
|
|
* Very fast, but space intensive. |
|
15
|
|
|
|
|
|
|
* |
|
16
|
|
|
|
|
|
|
* Cache larger counts using a base count + a single-word bit count. |
|
17
|
|
|
|
|
|
|
* |
|
18
|
|
|
|
|
|
|
* We used to use a binary search on a prime list, which is reasonable, |
|
19
|
|
|
|
|
|
|
* but the bit mask uses less memory and is faster on average. It also |
|
20
|
|
|
|
|
|
|
* easily allows larger sizes. Note: in 32-bit this isn't very efficient. |
|
21
|
|
|
|
|
|
|
* |
|
22
|
|
|
|
|
|
|
* If memory is a concern, we could switch to a base count every two words. |
|
23
|
|
|
|
|
|
|
*/ |
|
24
|
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
typedef struct { |
|
26
|
|
|
|
|
|
|
uint16_t *count; |
|
27
|
|
|
|
|
|
|
uint32_t *bm_count; |
|
28
|
|
|
|
|
|
|
UV *bm_mask; |
|
29
|
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
UV last_n; |
|
31
|
|
|
|
|
|
|
UV last_count_n; |
|
32
|
|
|
|
|
|
|
UV last_bmask_n; |
|
33
|
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
/* Statistics counting */ |
|
35
|
|
|
|
|
|
|
unsigned long nl_small; |
|
36
|
|
|
|
|
|
|
unsigned long nl_bmask; |
|
37
|
|
|
|
|
|
|
unsigned long nl_lmo; |
|
38
|
|
|
|
|
|
|
} pc_cache_t; |
|
39
|
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
|
|
41
|
76396
|
|
|
|
|
|
UV prime_count_cache_lookup(void* cobj, UV n) { |
|
42
|
76396
|
|
|
|
|
|
pc_cache_t *cache = (pc_cache_t*)cobj; |
|
43
|
|
|
|
|
|
|
|
|
44
|
76396
|
100
|
|
|
|
|
if (n <= 2) |
|
45
|
34
|
|
|
|
|
|
return (n==2); |
|
46
|
|
|
|
|
|
|
/* Look in the small direct cache. */ |
|
47
|
76362
|
100
|
|
|
|
|
if (n <= cache->last_count_n) { |
|
48
|
76173
|
|
|
|
|
|
cache->nl_small++; |
|
49
|
76173
|
|
|
|
|
|
return cache->count[(n-1)>>1]; |
|
50
|
|
|
|
|
|
|
} |
|
51
|
|
|
|
|
|
|
/* Look in bitmask */ |
|
52
|
189
|
50
|
|
|
|
|
if (n <= cache->last_bmask_n) { |
|
53
|
0
|
|
|
|
|
|
UV m = (n-1) >> 1; |
|
54
|
0
|
|
|
|
|
|
uint32_t idx = m / BITS_PER_WORD; |
|
55
|
0
|
|
|
|
|
|
uint32_t rem = m % BITS_PER_WORD; |
|
56
|
0
|
|
|
|
|
|
cache->nl_bmask++; |
|
57
|
0
|
|
|
|
|
|
return (UV)cache->bm_count[idx] + popcnt(cache->bm_mask[idx] >> (BITS_PER_WORD - 1 - rem)); |
|
58
|
|
|
|
|
|
|
} |
|
59
|
|
|
|
|
|
|
/* OK, call LMO/segment */ |
|
60
|
189
|
|
|
|
|
|
cache->nl_lmo++; |
|
61
|
189
|
|
|
|
|
|
return LMO_prime_count(n); |
|
62
|
|
|
|
|
|
|
} |
|
63
|
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
#if 0 |
|
66
|
|
|
|
|
|
|
static void _checkn(pc_cache_t *cache, UV n, UV count) { |
|
67
|
|
|
|
|
|
|
UV pc = prime_count_cache_lookup(cache, n); |
|
68
|
|
|
|
|
|
|
if (pc != count) |
|
69
|
|
|
|
|
|
|
croak(" pc cache [%lu] returned %lu instead of %lu\n", n, pc, count); |
|
70
|
|
|
|
|
|
|
} |
|
71
|
|
|
|
|
|
|
static void verify_cache(pc_cache_t *cache) { |
|
72
|
|
|
|
|
|
|
UV n = 3, c = 1, lastn = cache->last_n; |
|
73
|
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
_checkn(cache, 0, 0); |
|
75
|
|
|
|
|
|
|
_checkn(cache, 1, 0); |
|
76
|
|
|
|
|
|
|
_checkn(cache, 2, 1); |
|
77
|
|
|
|
|
|
|
START_DO_FOR_EACH_PRIME(3, next_prime(lastn)) { |
|
78
|
|
|
|
|
|
|
while (n < p) _checkn(cache, n++, c); |
|
79
|
|
|
|
|
|
|
_checkn(cache, n++, ++c); |
|
80
|
|
|
|
|
|
|
} END_DO_FOR_EACH_PRIME |
|
81
|
|
|
|
|
|
|
printf(" prime count cache verified to %lu complete\n", lastn); |
|
82
|
|
|
|
|
|
|
} |
|
83
|
|
|
|
|
|
|
static UV _bm_lookup(pc_cache_t *cache, UV n) { |
|
84
|
|
|
|
|
|
|
uint32_t m = (n-1) >> 1; |
|
85
|
|
|
|
|
|
|
uint32_t idx = m / BITS_PER_WORD; |
|
86
|
|
|
|
|
|
|
uint32_t rem = m % BITS_PER_WORD; |
|
87
|
|
|
|
|
|
|
return cache->bm_count[idx] + popcnt(cache->bm_mask[idx] >> (BITS_PER_WORD - 1 - rem)); |
|
88
|
|
|
|
|
|
|
} |
|
89
|
|
|
|
|
|
|
#else |
|
90
|
|
|
|
|
|
|
#define verify_cache(cache) /* nothing */ |
|
91
|
|
|
|
|
|
|
#define _bm_lookup(cache,n) prime_count_cache_lookup(cache,n) |
|
92
|
|
|
|
|
|
|
#endif |
|
93
|
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
|
|
95
|
42
|
|
|
|
|
|
void prime_count_cache_destroy(void* cobj) { |
|
96
|
42
|
|
|
|
|
|
pc_cache_t *cache = (pc_cache_t*)cobj; |
|
97
|
|
|
|
|
|
|
|
|
98
|
42
|
50
|
|
|
|
|
MPUverbose(2, " Prime Count Cache (max %lu):\n", (UV)cache->last_n); |
|
99
|
42
|
50
|
|
|
|
|
MPUverbose(2, " Small: %lu (%luk) Mask: %lu (%luk)\n", |
|
|
|
0
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
(unsigned long)cache->last_count_n, |
|
101
|
|
|
|
|
|
|
cache->last_count_n ? (unsigned long)(((cache->last_count_n-1)>>1)+1)*4/1024 : 0, |
|
102
|
|
|
|
|
|
|
(unsigned long)cache->last_bmask_n, |
|
103
|
|
|
|
|
|
|
(unsigned long) (sizeof(UV)+sizeof(uint32_t)) * (cache->last_bmask_n/(2*BITS_PER_WORD) + 1) / 1024); |
|
104
|
42
|
50
|
|
|
|
|
MPUverbose(2, " Lookups Small %lu Mask %lu LMO %lu\n", |
|
105
|
|
|
|
|
|
|
cache->nl_small, cache->nl_bmask, cache->nl_lmo); |
|
106
|
|
|
|
|
|
|
|
|
107
|
42
|
50
|
|
|
|
|
if (cache->count != 0) |
|
108
|
42
|
|
|
|
|
|
Safefree(cache->count); |
|
109
|
42
|
50
|
|
|
|
|
if (cache->bm_count != 0) |
|
110
|
0
|
|
|
|
|
|
Safefree(cache->bm_count); |
|
111
|
42
|
50
|
|
|
|
|
if (cache->bm_mask != 0) |
|
112
|
0
|
|
|
|
|
|
Safefree(cache->bm_mask); |
|
113
|
42
|
|
|
|
|
|
Safefree(cache); |
|
114
|
42
|
|
|
|
|
|
} |
|
115
|
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
/* prime_count(LIM_SMALL) <= 65535 */ |
|
118
|
|
|
|
|
|
|
#define LIM_SMALL 821640 |
|
119
|
|
|
|
|
|
|
|
|
120
|
36
|
|
|
|
|
|
void* prime_count_cache_create(UV n) { |
|
121
|
|
|
|
|
|
|
pc_cache_t *cache; |
|
122
|
|
|
|
|
|
|
|
|
123
|
36
|
50
|
|
|
|
|
if (n < 5) n = 5; |
|
124
|
|
|
|
|
|
|
#if BITS_PER_WORD == 64 |
|
125
|
|
|
|
|
|
|
/* The prime count has to fit in a uint32_t, so must be < 104484802057 */ |
|
126
|
|
|
|
|
|
|
/* Further limit to ~ 3GB. */ |
|
127
|
36
|
50
|
|
|
|
|
if (n > UVCONST( 34359738367)) n = UVCONST( 34359738367); |
|
128
|
|
|
|
|
|
|
#endif |
|
129
|
36
|
|
|
|
|
|
prime_precalc(LIM_SMALL); |
|
130
|
|
|
|
|
|
|
|
|
131
|
36
|
|
|
|
|
|
Newz(0, cache, 1, pc_cache_t); /* Allocate cache object, everything zero */ |
|
132
|
36
|
|
|
|
|
|
cache->last_n = n; |
|
133
|
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
/* Fill in small counts */ |
|
135
|
|
|
|
|
|
|
{ |
|
136
|
36
|
|
|
|
|
|
uint32_t const count_last_n = (n <= LIM_SMALL) ? n : LIM_SMALL; |
|
137
|
36
|
|
|
|
|
|
uint32_t const count_last_idx = (count_last_n-1) >> 1; |
|
138
|
36
|
|
|
|
|
|
uint32_t idx = 1; |
|
139
|
36
|
|
|
|
|
|
uint16_t cnt = 1, *counts; |
|
140
|
36
|
|
|
|
|
|
New(0, counts, count_last_idx+1, uint16_t); |
|
141
|
36
|
|
|
|
|
|
counts[0] = 1; |
|
142
|
29087
|
50
|
|
|
|
|
START_DO_FOR_EACH_PRIME(3, count_last_n) { |
|
|
|
50
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
143
|
122707
|
100
|
|
|
|
|
while (idx < ((p-1)>>1)) counts[idx++] = cnt; |
|
144
|
29002
|
|
|
|
|
|
counts[idx++] = ++cnt; |
|
145
|
|
|
|
|
|
|
} END_DO_FOR_EACH_PRIME |
|
146
|
115
|
100
|
|
|
|
|
while (idx <= count_last_idx) counts[idx++] = cnt; |
|
147
|
|
|
|
|
|
|
|
|
148
|
36
|
|
|
|
|
|
cache->count = counts; |
|
149
|
36
|
|
|
|
|
|
cache->last_count_n = count_last_n; |
|
150
|
|
|
|
|
|
|
} |
|
151
|
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
/* Fill in bitmask and base counts */ |
|
153
|
36
|
50
|
|
|
|
|
if (n > cache->last_count_n) { |
|
154
|
|
|
|
|
|
|
UV *mask; |
|
155
|
|
|
|
|
|
|
uint32_t *count, i; |
|
156
|
0
|
|
|
|
|
|
uint32_t words = (n / (2*BITS_PER_WORD)) + 1; /* 0-127=1, 128-255=2 */ |
|
157
|
0
|
|
|
|
|
|
Newz(0, count, words, uint32_t); |
|
158
|
0
|
|
|
|
|
|
Newz(0, mask, words, UV); |
|
159
|
|
|
|
|
|
|
|
|
160
|
0
|
|
|
|
|
|
mask[0] = UVCONST(15) << (BITS_PER_WORD-4); |
|
161
|
|
|
|
|
|
|
{ |
|
162
|
|
|
|
|
|
|
unsigned char* segment; |
|
163
|
|
|
|
|
|
|
UV seg_base, seg_low, seg_high; |
|
164
|
0
|
|
|
|
|
|
void* ctx = start_segment_primes(7, n, &segment); |
|
165
|
0
|
0
|
|
|
|
|
while (next_segment_primes(ctx, &seg_base, &seg_low, &seg_high)) { |
|
166
|
0
|
0
|
|
|
|
|
START_DO_FOR_EACH_SIEVE_PRIME( segment, seg_base, seg_low, seg_high ) |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
167
|
0
|
|
|
|
|
|
UV m = (p-1)>>1; |
|
168
|
0
|
|
|
|
|
|
uint32_t midx = m / BITS_PER_WORD; |
|
169
|
0
|
|
|
|
|
|
uint32_t mrem = m % BITS_PER_WORD; |
|
170
|
0
|
|
|
|
|
|
mask[midx] |= UVCONST(1) << (BITS_PER_WORD-1-mrem); |
|
171
|
0
|
|
|
|
|
|
END_DO_FOR_EACH_SIEVE_PRIME |
|
172
|
|
|
|
|
|
|
} |
|
173
|
0
|
|
|
|
|
|
end_segment_primes(ctx); |
|
174
|
|
|
|
|
|
|
} |
|
175
|
0
|
0
|
|
|
|
|
for (i = 1; i < words; i++) |
|
176
|
0
|
|
|
|
|
|
count[i] = count[i-1] + popcnt(mask[i-1]); |
|
177
|
|
|
|
|
|
|
|
|
178
|
0
|
|
|
|
|
|
cache->bm_mask = mask; |
|
179
|
0
|
|
|
|
|
|
cache->bm_count = count; |
|
180
|
0
|
|
|
|
|
|
cache->last_bmask_n = n; |
|
181
|
|
|
|
|
|
|
} |
|
182
|
|
|
|
|
|
|
verify_cache(cache); |
|
183
|
36
|
|
|
|
|
|
return cache; |
|
184
|
|
|
|
|
|
|
} |
|
185
|
|
|
|
|
|
|
|
|
186
|
6
|
|
|
|
|
|
void* prime_count_cache_create_with_primes(const uint32_t *primes, uint32_t lastidx) { |
|
187
|
|
|
|
|
|
|
#if 0 /* Slower */ |
|
188
|
|
|
|
|
|
|
return prime_count_cache_create(primes[lastidx]); |
|
189
|
|
|
|
|
|
|
#else /* Faster, but so much code duplication.... */ |
|
190
|
|
|
|
|
|
|
pc_cache_t *cache; |
|
191
|
|
|
|
|
|
|
uint32_t i, n; |
|
192
|
|
|
|
|
|
|
|
|
193
|
6
|
50
|
|
|
|
|
MPUassert(primes != 0, "prime_count_cache_create called with null pointer"); |
|
194
|
6
|
50
|
|
|
|
|
if (lastidx <= 1) return prime_count_cache_create(5); |
|
195
|
6
|
50
|
|
|
|
|
if (lastidx > 203280221) lastidx = 203280221; |
|
196
|
|
|
|
|
|
|
|
|
197
|
6
|
|
|
|
|
|
Newz(0, cache, 1, pc_cache_t); /* Allocate cache object, everything zero */ |
|
198
|
6
|
|
|
|
|
|
cache->last_n = n = primes[lastidx]; |
|
199
|
|
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
/* Fill in small counts */ |
|
201
|
|
|
|
|
|
|
{ |
|
202
|
6
|
|
|
|
|
|
uint32_t const count_last_n = (n <= LIM_SMALL) ? n : LIM_SMALL; |
|
203
|
6
|
|
|
|
|
|
uint32_t const count_last_idx = (count_last_n-1) >> 1; |
|
204
|
6
|
|
|
|
|
|
uint32_t idx = 1; |
|
205
|
6
|
|
|
|
|
|
uint16_t cnt = 1, *counts; |
|
206
|
6
|
|
|
|
|
|
New(0, counts, count_last_idx+1, uint16_t); |
|
207
|
6
|
|
|
|
|
|
counts[0] = 1; |
|
208
|
5278
|
100
|
|
|
|
|
for (i = 2; i <= lastidx; i++) { |
|
209
|
5272
|
|
|
|
|
|
uint32_t p = primes[i]; |
|
210
|
5272
|
50
|
|
|
|
|
if (p > count_last_n) break; |
|
211
|
20803
|
100
|
|
|
|
|
while (idx < ((p-1)>>1)) counts[idx++] = cnt; |
|
212
|
5272
|
|
|
|
|
|
counts[idx++] = ++cnt; |
|
213
|
|
|
|
|
|
|
} |
|
214
|
6
|
50
|
|
|
|
|
while (idx <= count_last_idx) counts[idx++] = cnt; |
|
215
|
|
|
|
|
|
|
|
|
216
|
6
|
|
|
|
|
|
cache->count = counts; |
|
217
|
6
|
|
|
|
|
|
cache->last_count_n = count_last_n; |
|
218
|
|
|
|
|
|
|
} |
|
219
|
|
|
|
|
|
|
|
|
220
|
|
|
|
|
|
|
/* Fill in bitmask and base counts */ |
|
221
|
6
|
50
|
|
|
|
|
if (n > cache->last_count_n) { |
|
222
|
|
|
|
|
|
|
UV *mask; |
|
223
|
|
|
|
|
|
|
uint32_t *count; |
|
224
|
0
|
|
|
|
|
|
uint32_t words = (n / (2*BITS_PER_WORD)) + 1; /* 0-127=1, 128-255=2 */ |
|
225
|
0
|
|
|
|
|
|
Newz(0, count, words, uint32_t); |
|
226
|
0
|
|
|
|
|
|
Newz(0, mask, words, UV); |
|
227
|
|
|
|
|
|
|
|
|
228
|
0
|
|
|
|
|
|
mask[0] = UVCONST(1) << (BITS_PER_WORD-1); |
|
229
|
0
|
0
|
|
|
|
|
for (i = 2; i <= lastidx; i++) { |
|
230
|
0
|
|
|
|
|
|
uint32_t p = primes[i]; |
|
231
|
0
|
|
|
|
|
|
uint32_t m = (p-1)>>1; |
|
232
|
0
|
|
|
|
|
|
uint32_t midx = m / BITS_PER_WORD; |
|
233
|
0
|
|
|
|
|
|
uint32_t mrem = m % BITS_PER_WORD; |
|
234
|
0
|
|
|
|
|
|
mask[midx] |= UVCONST(1) << (BITS_PER_WORD-1-mrem); |
|
235
|
|
|
|
|
|
|
} |
|
236
|
|
|
|
|
|
|
|
|
237
|
0
|
0
|
|
|
|
|
for (i = 1; i < words; i++) |
|
238
|
0
|
|
|
|
|
|
count[i] = count[i-1] + popcnt(mask[i-1]); |
|
239
|
|
|
|
|
|
|
|
|
240
|
0
|
|
|
|
|
|
cache->bm_mask = mask; |
|
241
|
0
|
|
|
|
|
|
cache->bm_count = count; |
|
242
|
0
|
|
|
|
|
|
cache->last_bmask_n = n; |
|
243
|
|
|
|
|
|
|
} |
|
244
|
|
|
|
|
|
|
verify_cache(cache); |
|
245
|
6
|
|
|
|
|
|
return cache; |
|
246
|
|
|
|
|
|
|
#endif |
|
247
|
|
|
|
|
|
|
} |