File Coverage

prime_count_cache.c
Criterion Covered Total %
statement 63 103 61.1
branch 46 86 53.4
condition n/a
subroutine n/a
pod n/a
total 109 189 57.6


line stmt bran cond sub pod time code
1             #include
2             #include
3             #include
4              
5             #define FUNC_popcnt 1
6             #include "ptypes.h"
7             #include "cache.h"
8             #include "sieve.h"
9             #include "util.h"
10             #include "lmo.h"
11              
12             /*
13             * Cache small counts directly using a uint32_t array.
14             * Very fast, but space intensive.
15             *
16             * Cache larger counts using a base count + a single-word bit count.
17             *
18             * We used to use a binary search on a prime list, which is reasonable,
19             * but the bit mask uses less memory and is faster on average. It also
20             * easily allows larger sizes. Note: in 32-bit this isn't very efficient.
21             *
22             * If memory is a concern, we could switch to a base count every two words.
23             */
24              
25             typedef struct {
26             uint16_t *count;
27             uint32_t *bm_count;
28             UV *bm_mask;
29              
30             UV last_n;
31             UV last_count_n;
32             UV last_bmask_n;
33              
34             /* Statistics counting */
35             unsigned long nl_small;
36             unsigned long nl_bmask;
37             unsigned long nl_lmo;
38             } pc_cache_t;
39              
40              
41 76396           UV prime_count_cache_lookup(void* cobj, UV n) {
42 76396           pc_cache_t *cache = (pc_cache_t*)cobj;
43              
44 76396 100         if (n <= 2)
45 34           return (n==2);
46             /* Look in the small direct cache. */
47 76362 100         if (n <= cache->last_count_n) {
48 76173           cache->nl_small++;
49 76173           return cache->count[(n-1)>>1];
50             }
51             /* Look in bitmask */
52 189 50         if (n <= cache->last_bmask_n) {
53 0           UV m = (n-1) >> 1;
54 0           uint32_t idx = m / BITS_PER_WORD;
55 0           uint32_t rem = m % BITS_PER_WORD;
56 0           cache->nl_bmask++;
57 0           return (UV)cache->bm_count[idx] + popcnt(cache->bm_mask[idx] >> (BITS_PER_WORD - 1 - rem));
58             }
59             /* OK, call LMO/segment */
60 189           cache->nl_lmo++;
61 189           return LMO_prime_count(n);
62             }
63              
64              
65             #if 0
66             static void _checkn(pc_cache_t *cache, UV n, UV count) {
67             UV pc = prime_count_cache_lookup(cache, n);
68             if (pc != count)
69             croak(" pc cache [%lu] returned %lu instead of %lu\n", n, pc, count);
70             }
71             static void verify_cache(pc_cache_t *cache) {
72             UV n = 3, c = 1, lastn = cache->last_n;
73              
74             _checkn(cache, 0, 0);
75             _checkn(cache, 1, 0);
76             _checkn(cache, 2, 1);
77             START_DO_FOR_EACH_PRIME(3, next_prime(lastn)) {
78             while (n < p) _checkn(cache, n++, c);
79             _checkn(cache, n++, ++c);
80             } END_DO_FOR_EACH_PRIME
81             printf(" prime count cache verified to %lu complete\n", lastn);
82             }
83             static UV _bm_lookup(pc_cache_t *cache, UV n) {
84             uint32_t m = (n-1) >> 1;
85             uint32_t idx = m / BITS_PER_WORD;
86             uint32_t rem = m % BITS_PER_WORD;
87             return cache->bm_count[idx] + popcnt(cache->bm_mask[idx] >> (BITS_PER_WORD - 1 - rem));
88             }
89             #else
90             #define verify_cache(cache) /* nothing */
91             #define _bm_lookup(cache,n) prime_count_cache_lookup(cache,n)
92             #endif
93              
94              
95 42           void prime_count_cache_destroy(void* cobj) {
96 42           pc_cache_t *cache = (pc_cache_t*)cobj;
97              
98 42 50         MPUverbose(2, " Prime Count Cache (max %lu):\n", (UV)cache->last_n);
99 42 50         MPUverbose(2, " Small: %lu (%luk) Mask: %lu (%luk)\n",
    0          
100             (unsigned long)cache->last_count_n,
101             cache->last_count_n ? (unsigned long)(((cache->last_count_n-1)>>1)+1)*4/1024 : 0,
102             (unsigned long)cache->last_bmask_n,
103             (unsigned long) (sizeof(UV)+sizeof(uint32_t)) * (cache->last_bmask_n/(2*BITS_PER_WORD) + 1) / 1024);
104 42 50         MPUverbose(2, " Lookups Small %lu Mask %lu LMO %lu\n",
105             cache->nl_small, cache->nl_bmask, cache->nl_lmo);
106              
107 42 50         if (cache->count != 0)
108 42           Safefree(cache->count);
109 42 50         if (cache->bm_count != 0)
110 0           Safefree(cache->bm_count);
111 42 50         if (cache->bm_mask != 0)
112 0           Safefree(cache->bm_mask);
113 42           Safefree(cache);
114 42           }
115              
116              
117             /* prime_count(LIM_SMALL) <= 65535 */
118             #define LIM_SMALL 821640
119              
120 36           void* prime_count_cache_create(UV n) {
121             pc_cache_t *cache;
122              
123 36 50         if (n < 5) n = 5;
124             #if BITS_PER_WORD == 64
125             /* The prime count has to fit in a uint32_t, so must be < 104484802057 */
126             /* Further limit to ~ 3GB. */
127 36 50         if (n > UVCONST( 34359738367)) n = UVCONST( 34359738367);
128             #endif
129 36           prime_precalc(LIM_SMALL);
130              
131 36           Newz(0, cache, 1, pc_cache_t); /* Allocate cache object, everything zero */
132 36           cache->last_n = n;
133              
134             /* Fill in small counts */
135             {
136 36           uint32_t const count_last_n = (n <= LIM_SMALL) ? n : LIM_SMALL;
137 36           uint32_t const count_last_idx = (count_last_n-1) >> 1;
138 36           uint32_t idx = 1;
139 36           uint16_t cnt = 1, *counts;
140 36           New(0, counts, count_last_idx+1, uint16_t);
141 36           counts[0] = 1;
142 29087 50         START_DO_FOR_EACH_PRIME(3, count_last_n) {
    50          
    100          
    50          
    100          
    100          
    100          
    100          
    100          
    50          
    100          
143 122707 100         while (idx < ((p-1)>>1)) counts[idx++] = cnt;
144 29002           counts[idx++] = ++cnt;
145             } END_DO_FOR_EACH_PRIME
146 115 100         while (idx <= count_last_idx) counts[idx++] = cnt;
147              
148 36           cache->count = counts;
149 36           cache->last_count_n = count_last_n;
150             }
151              
152             /* Fill in bitmask and base counts */
153 36 50         if (n > cache->last_count_n) {
154             UV *mask;
155             uint32_t *count, i;
156 0           uint32_t words = (n / (2*BITS_PER_WORD)) + 1; /* 0-127=1, 128-255=2 */
157 0           Newz(0, count, words, uint32_t);
158 0           Newz(0, mask, words, UV);
159              
160 0           mask[0] = UVCONST(15) << (BITS_PER_WORD-4);
161             {
162             unsigned char* segment;
163             UV seg_base, seg_low, seg_high;
164 0           void* ctx = start_segment_primes(7, n, &segment);
165 0 0         while (next_segment_primes(ctx, &seg_base, &seg_low, &seg_high)) {
166 0 0         START_DO_FOR_EACH_SIEVE_PRIME( segment, seg_base, seg_low, seg_high )
    0          
    0          
    0          
    0          
167 0           UV m = (p-1)>>1;
168 0           uint32_t midx = m / BITS_PER_WORD;
169 0           uint32_t mrem = m % BITS_PER_WORD;
170 0           mask[midx] |= UVCONST(1) << (BITS_PER_WORD-1-mrem);
171 0           END_DO_FOR_EACH_SIEVE_PRIME
172             }
173 0           end_segment_primes(ctx);
174             }
175 0 0         for (i = 1; i < words; i++)
176 0           count[i] = count[i-1] + popcnt(mask[i-1]);
177              
178 0           cache->bm_mask = mask;
179 0           cache->bm_count = count;
180 0           cache->last_bmask_n = n;
181             }
182             verify_cache(cache);
183 36           return cache;
184             }
185              
186 6           void* prime_count_cache_create_with_primes(const uint32_t *primes, uint32_t lastidx) {
187             #if 0 /* Slower */
188             return prime_count_cache_create(primes[lastidx]);
189             #else /* Faster, but so much code duplication.... */
190             pc_cache_t *cache;
191             uint32_t i, n;
192              
193 6 50         MPUassert(primes != 0, "prime_count_cache_create called with null pointer");
194 6 50         if (lastidx <= 1) return prime_count_cache_create(5);
195 6 50         if (lastidx > 203280221) lastidx = 203280221;
196              
197 6           Newz(0, cache, 1, pc_cache_t); /* Allocate cache object, everything zero */
198 6           cache->last_n = n = primes[lastidx];
199              
200             /* Fill in small counts */
201             {
202 6           uint32_t const count_last_n = (n <= LIM_SMALL) ? n : LIM_SMALL;
203 6           uint32_t const count_last_idx = (count_last_n-1) >> 1;
204 6           uint32_t idx = 1;
205 6           uint16_t cnt = 1, *counts;
206 6           New(0, counts, count_last_idx+1, uint16_t);
207 6           counts[0] = 1;
208 5278 100         for (i = 2; i <= lastidx; i++) {
209 5272           uint32_t p = primes[i];
210 5272 50         if (p > count_last_n) break;
211 20803 100         while (idx < ((p-1)>>1)) counts[idx++] = cnt;
212 5272           counts[idx++] = ++cnt;
213             }
214 6 50         while (idx <= count_last_idx) counts[idx++] = cnt;
215              
216 6           cache->count = counts;
217 6           cache->last_count_n = count_last_n;
218             }
219              
220             /* Fill in bitmask and base counts */
221 6 50         if (n > cache->last_count_n) {
222             UV *mask;
223             uint32_t *count;
224 0           uint32_t words = (n / (2*BITS_PER_WORD)) + 1; /* 0-127=1, 128-255=2 */
225 0           Newz(0, count, words, uint32_t);
226 0           Newz(0, mask, words, UV);
227              
228 0           mask[0] = UVCONST(1) << (BITS_PER_WORD-1);
229 0 0         for (i = 2; i <= lastidx; i++) {
230 0           uint32_t p = primes[i];
231 0           uint32_t m = (p-1)>>1;
232 0           uint32_t midx = m / BITS_PER_WORD;
233 0           uint32_t mrem = m % BITS_PER_WORD;
234 0           mask[midx] |= UVCONST(1) << (BITS_PER_WORD-1-mrem);
235             }
236              
237 0 0         for (i = 1; i < words; i++)
238 0           count[i] = count[i-1] + popcnt(mask[i-1]);
239              
240 0           cache->bm_mask = mask;
241 0           cache->bm_count = count;
242 0           cache->last_bmask_n = n;
243             }
244             verify_cache(cache);
245 6           return cache;
246             #endif
247             }