File Coverage

sieve.c
Criterion Covered Total %
statement 220 240 91.6
branch 311 394 78.9
condition n/a
subroutine n/a
pod n/a
total 531 634 83.7


line stmt bran cond sub pod time code
1             #include
2             #include
3             #include
4             #include
5              
6             #define FUNC_isqrt 1
7             #define FUNC_next_prime_in_sieve
8             #include "sieve.h"
9             #include "ptypes.h"
10             #include "cache.h"
11             #include "util.h"
12             #include "primality.h"
13             #include "montmath.h"
14             #include "prime_counts.h"
15              
16             /* Is it better to do a partial sieve + primality tests vs. full sieve? */
17 6322           static bool do_partial_sieve(UV startp, UV endp) {
18 6322           UV range = endp - startp;
19 6322           if (USE_MONTMATH) range /= 8; /* Fast primality tests */
20             #if BITS_PER_WORD == 64
21 6322 100         if ( (startp > UVCONST( 100000000000000) && range < 40000) ||
    50          
    50          
22 6311 0         (startp > UVCONST( 1000000000000000) && range < 150000) ||
    50          
23 6311 0         (startp > UVCONST( 10000000000000000) && range < 600000) ||
    50          
24 6311 0         (startp > UVCONST( 100000000000000000) && range < 2500000) ||
    50          
25 6311 0         (startp > UVCONST( 1000000000000000000) && range < 10000000) ||
    50          
26 0 0         (startp > UVCONST(10000000000000000000) && range < 40000000) )
27 11           return 1;
28             #endif
29 6311           return 0;
30             }
31              
32             /* 1001 bytes of presieved mod-30 bytes. If the area to be sieved is
33             * appropriately filled with this data, then 7, 11, and 13 do not have
34             * to be sieved. It wraps, so multiple memcpy's can be used. Do be
35             * aware that if you start at 0, you'll have to correct the first byte.
36             *
37             * mpu '$g=7*11*13; @b=(0)x$g; for $d (0..$g-1) { $i=0; for $m (1,7,11,13,17,19,23,29) { $n=30*$d+$m; if (gcd($n,$g) != 1) { $b[$d] |= (1<<$i); } $i++; } } for (0..$#b) { printf "0x%02x,",$b[$_]; print "\n" unless ($_+1)%13; } print "\n"'
38             */
39             #define PRESIEVE_SIZE (7*11*13)
40             static const unsigned char presieve13[PRESIEVE_SIZE] =
41             { 0x0e,0x20,0x10,0x81,0x49,0x24,0xc2,0x06,0x2a,0x90,0xa1,0x0c,0x14,
42             0x58,0x02,0x61,0x11,0xc3,0x28,0x0c,0x44,0x22,0xa4,0x10,0x91,0x18,
43             0x4d,0x40,0x82,0x21,0x58,0xa1,0x28,0x04,0x42,0x92,0x20,0x51,0x91,
44             0x8a,0x04,0x48,0x03,0x60,0x34,0x81,0x1c,0x06,0xc1,0x02,0xa2,0x10,
45             0x89,0x08,0x24,0x45,0x42,0x30,0x10,0xc5,0x0a,0x86,0x40,0x0a,0x30,
46             0x38,0x85,0x08,0x15,0x40,0x63,0x20,0x96,0x83,0x88,0x04,0x60,0x16,
47             0x28,0x10,0x81,0x49,0x44,0xe2,0x02,0x2c,0x12,0xa1,0x0c,0x04,0x50,
48             0x0a,0x61,0x10,0x83,0x48,0x2c,0x40,0x26,0x26,0x90,0x91,0x08,0x55,
49             0x48,0x82,0x20,0x19,0xc1,0x28,0x04,0x44,0x12,0xa0,0x51,0x81,0x9a,
50             0x0c,0x48,0x02,0x21,0x54,0xa1,0x18,0x04,0x43,0x82,0xa2,0x10,0x99,
51             0x08,0x24,0x44,0x03,0x70,0x30,0xc1,0x0c,0x86,0xc0,0x0a,0x20,0x30,
52             0x8d,0x08,0x14,0x41,0x43,0x20,0x92,0x85,0x0a,0x84,0x60,0x06,0x30,
53             0x18,0x81,0x49,0x05,0xc2,0x22,0x28,0x14,0xa3,0x8c,0x04,0x50,0x12,
54             0x69,0x10,0x83,0x09,0x4c,0x60,0x22,0x24,0x12,0x91,0x08,0x45,0x50,
55             0x8a,0x20,0x18,0x81,0x68,0x24,0x40,0x16,0x22,0xd1,0x81,0x8a,0x14,
56             0x48,0x02,0x20,0x15,0xc1,0x38,0x04,0x45,0x02,0xa2,0x10,0x89,0x18,
57             0x2c,0x44,0x02,0x31,0x50,0xe1,0x08,0x86,0x42,0x8a,0x20,0x30,0x95,
58             0x08,0x14,0x40,0x43,0x60,0xb2,0x81,0x0c,0x06,0xe0,0x06,0x20,0x10,
59             0x89,0x49,0x04,0xc3,0x42,0x28,0x10,0xa5,0x0e,0x84,0x50,0x02,0x71,
60             0x18,0x83,0x08,0x0d,0x40,0x22,0x24,0x14,0x93,0x88,0x45,0x40,0x92,
61             0x28,0x18,0x81,0x29,0x44,0x60,0x12,0x24,0x53,0x81,0x8a,0x04,0x58,
62             0x0a,0x20,0x14,0x81,0x58,0x24,0x41,0x06,0xa2,0x90,0x89,0x08,0x34,
63             0x4c,0x02,0x30,0x11,0xc1,0x28,0x86,0x44,0x0a,0xa0,0x30,0x85,0x18,
64             0x1c,0x40,0x43,0x21,0xd2,0xa1,0x08,0x04,0x62,0x86,0x20,0x10,0x91,
65             0x49,0x04,0xc2,0x03,0x68,0x30,0xa1,0x0c,0x06,0xd0,0x02,0x61,0x10,
66             0x8b,0x08,0x0c,0x41,0x62,0x24,0x10,0x95,0x0a,0xc5,0x40,0x82,0x30,
67             0x18,0x81,0x28,0x05,0x40,0x32,0x20,0x55,0x83,0x8a,0x04,0x48,0x12,
68             0x28,0x14,0x81,0x19,0x44,0x61,0x02,0xa6,0x12,0x89,0x08,0x24,0x54,
69             0x0a,0x30,0x10,0xc1,0x48,0xa6,0x40,0x0e,0x22,0xb0,0x85,0x08,0x14,
70             0x48,0x43,0x20,0x93,0xc1,0x28,0x04,0x64,0x06,0xa0,0x10,0x81,0x59,
71             0x0c,0xc2,0x02,0x29,0x50,0xa1,0x0c,0x04,0x52,0x82,0x61,0x10,0x93,
72             0x08,0x0c,0x40,0x23,0x64,0x30,0x91,0x0c,0x47,0xc0,0x82,0x20,0x18,
73             0x89,0x28,0x04,0x41,0x52,0x20,0x51,0x85,0x8a,0x84,0x48,0x02,0x30,
74             0x1c,0x81,0x18,0x05,0x41,0x22,0xa2,0x14,0x8b,0x88,0x24,0x44,0x12,
75             0x38,0x10,0xc1,0x09,0xc6,0x60,0x0a,0x24,0x32,0x85,0x08,0x14,0x50,
76             0x4b,0x20,0x92,0x81,0x48,0x24,0x60,0x06,0x22,0x90,0x81,0x49,0x14,
77             0xca,0x02,0x28,0x11,0xe1,0x2c,0x04,0x54,0x02,0xe1,0x10,0x83,0x18,
78             0x0c,0x40,0x22,0x25,0x50,0xb1,0x08,0x45,0x42,0x82,0x20,0x18,0x91,
79             0x28,0x04,0x40,0x13,0x60,0x71,0x81,0x8e,0x06,0xc8,0x02,0x20,0x14,
80             0x89,0x18,0x04,0x41,0x42,0xa2,0x10,0x8d,0x0a,0xa4,0x44,0x02,0x30,
81             0x18,0xc1,0x08,0x87,0x40,0x2a,0x20,0x34,0x87,0x88,0x14,0x40,0x53,
82             0x28,0x92,0x81,0x09,0x44,0x60,0x06,0x24,0x12,0x81,0x49,0x04,0xd2,
83             0x0a,0x28,0x10,0xa1,0x4c,0x24,0x50,0x06,0x63,0x90,0x83,0x08,0x1c,
84             0x48,0x22,0x24,0x11,0xd1,0x28,0x45,0x44,0x82,0xa0,0x18,0x81,0x38,
85             0x0c,0x40,0x12,0x21,0x51,0xa1,0x8a,0x04,0x4a,0x82,0x20,0x14,0x91,
86             0x18,0x04,0x41,0x03,0xe2,0x30,0x89,0x0c,0x26,0xc4,0x02,0x30,0x10,
87             0xc9,0x08,0x86,0x41,0x4a,0x20,0x30,0x85,0x0a,0x94,0x40,0x43,0x30,
88             0x9a,0x81,0x08,0x05,0x60,0x26,0x20,0x14,0x83,0xc9,0x04,0xc2,0x12,
89             0x28,0x10,0xa1,0x0d,0x44,0x70,0x02,0x65,0x12,0x83,0x08,0x0c,0x50,
90             0x2a,0x24,0x10,0x91,0x48,0x65,0x40,0x86,0x22,0x98,0x81,0x28,0x14,
91             0x48,0x12,0x20,0x51,0xc1,0xaa,0x04,0x4c,0x02,0xa0,0x14,0x81,0x18,
92             0x0c,0x41,0x02,0xa3,0x50,0xa9,0x08,0x24,0x46,0x82,0x30,0x10,0xd1,
93             0x08,0x86,0x40,0x0b,0x60,0x30,0x85,0x0c,0x16,0xc0,0x43,0x20,0x92,
94             0x89,0x08,0x04,0x61,0x46,0x20,0x10,0x85,0x4b,0x84,0xc2,0x02,0x38,
95             0x18,0xa1,0x0c,0x05,0x50,0x22,0x61,0x14,0x83,0x88,0x0c,0x40,0x32,
96             0x2c,0x10,0x91,0x09,0x45,0x60,0x82,0x24,0x1a,0x81,0x28,0x04,0x50,
97             0x1a,0x20,0x51,0x81,0xca,0x24,0x48,0x06,0x22,0x94,0x81,0x18,0x14,
98             0x49,0x02,0xa2,0x11,0xc9,0x28,0x24,0x44,0x02,0xb0,0x10,0xc1,0x18,
99             0x8e,0x40,0x0a,0x21,0x70,0xa5,0x08,0x14,0x42,0xc3,0x20,0x92,0x91,
100             0x08,0x04,0x60,0x07,0x60,0x30,0x81,0x4d,0x06,0xc2,0x02,0x28,0x10,
101             0xa9,0x0c,0x04,0x51,0x42,0x61,0x10,0x87,0x0a,0x8c,0x40,0x22,0x34,
102             0x18,0x91,0x08,0x45,0x40,0xa2,0x20,0x1c,0x83,0xa8,0x04,0x40,0x12,
103             0x28,0x51,0x81,0x8b,0x44,0x68,0x02,0x24,0x16,0x81,0x18,0x04,0x51,
104             0x0a,0xa2,0x10,0x89,0x48,0x24,0x44,0x06,0x32,0x90,0xc1,0x08,0x96,
105             0x48,0x0a,0x20,0x31,0xc5,0x28,0x14,0x44,0x43,0xa0,0x92,0x81,0x18,
106             0x0c,0x60,0x06,0x21,0x50,0xa1,0x49,0x04,0xc2,0x82,0x28,0x10,0xb1,
107             0x0c,0x04,0x50,0x03,0x61,0x30,0x83,0x0c,0x0e,0xc0,0x22,0x24,0x10,
108             0x99,0x08,0x45,0x41,0xc2,0x20,0x18,0x85,0x2a,0x84,0x40,0x12,0x30,
109             0x59,0x81,0x8a,0x05,0x48,0x22,0x20,0x14,0x83,0x98,0x04,0x41,0x12,
110             0xaa,0x10,0x89,0x09,0x64,0x64,0x02,0x34,0x12,0xc1,0x08,0x86,0x50,
111             0x0a,0x20,0x30,0x85,0x48,0x34,0x40,0x47,0x22,0x92,0x81,0x08,0x14,
112             0x68,0x06,0x20,0x11,0xc1,0x69,0x04,0xc6,0x02,0xa8,0x10,0xa1,0x1c,
113             0x0c,0x50,0x02,0x61,0x50,0xa3,0x08,0x0c,0x42,0xa2,0x24,0x10,0x91,
114             0x08,0x45,0x40,0x83,0x60,0x38,0x81,0x2c,0x06,0xc0,0x12,0x20,0x51,
115             0x89,0x8a,0x04,0x49,0x42,0x20,0x14,0x85,0x1a,0x84,0x41,0x02,0xb2,
116             0x18,0x89,0x08,0x25,0x44,0x22,0x30,0x14,0xc3,0x88,0x86,0x40,0x1a,
117             0x28,0x30,0x85,0x09,0x54,0x60,0x43,0x24,0x92,0x81,0x08,0x04,0x70};
118              
119             static const UV max_sieve_prime = (BITS_PER_WORD==64) ? 4294967291U : 65521U;
120              
121             /* Tile bytes from source to bytes in dest */
122 5895           static void memtile(unsigned char* dst, const unsigned char* src, size_t from, size_t to) {
123 5895 100         if (to < from)
124 5252           from = to;
125 5895 50         if (dst != src)
126 0           memcpy(dst, src, from);
127 9383 100         while (from < to) {
128 3488 100         size_t bytes = (2*from > to) ? to-from : from;
129 3488           memcpy(dst+from, dst, bytes);
130 3488           from += bytes;
131             }
132 5895           }
133              
134 6793           static UV sieve_prefill(unsigned char* mem, UV startd, UV endd)
135             {
136 6793           UV vnext_prime = 17;
137 6793           UV nbytes = endd - startd + 1;
138 6793 50         MPUassert( (mem != 0) && (endd >= startd), "sieve_prefill bad arguments");
    50          
139              
140 6793 100         if (startd != 0) {
141 1422           UV pstartd = startd % PRESIEVE_SIZE;
142 1422           UV tailbytes = PRESIEVE_SIZE - pstartd;
143 1422 100         if (tailbytes > nbytes) tailbytes = nbytes;
144 1422           memcpy(mem, presieve13 + pstartd, tailbytes); /* Copy tail to mem */
145 1422           mem += tailbytes; /* Advance so mem points at the beginning */
146 1422           nbytes -= tailbytes;
147             }
148 6793 100         if (nbytes > 0) {
149 5895           memcpy(mem, presieve13, (nbytes < PRESIEVE_SIZE) ? nbytes : PRESIEVE_SIZE);
150 5895           memtile(mem, mem, PRESIEVE_SIZE, nbytes);
151 5895 100         if (startd == 0) mem[0] = 0x01; /* Correct first byte */
152             }
153             /* Leaving option open to tile 17 out and sieve, then return 19 */
154 6793           return vnext_prime;
155             }
156              
157             /* Marking primes is done the same way we used to do with tables, but
158             * now uses heavily unrolled code based on Kim Walisch's mod-30 sieve.
159             */
160             #define set_bit(s,n) *(s) |= (1 << n);
161             static const unsigned char masknum30[30] =
162             {0,0,0,0,0,0,0,1,0,0,0,2,0,3,0,0,0,4,0,5,0,0,0,6,0,0,0,0,0,7};
163             static const unsigned char qinit30[30] =
164             {0,0,1,1,1,1,1,1,2,2,2,2,3,3,4,4,4,4,5,5,6,6,6,6,7,7,7,7,7,7};
165              
166             typedef struct {
167             uint32_t prime;
168             UV offset;
169             uint8_t index;
170             } wheel_t;
171              
172             #define CROSS_INDEX(v, b0,b1,b2,b3,b4,b5,b6,b7, i0,i1,i2,i3,i4,i5,i6,i7, it) \
173             while (1) { \
174             case (v+0): if(s>=send){w->index=v+0;break;} set_bit(s,b0); s += r*6+i0; \
175             case (v+1): if(s>=send){w->index=v+1;break;} set_bit(s,b1); s += r*4+i1; \
176             case (v+2): if(s>=send){w->index=v+2;break;} set_bit(s,b2); s += r*2+i2; \
177             case (v+3): if(s>=send){w->index=v+3;break;} set_bit(s,b3); s += r*4+i3; \
178             case (v+4): if(s>=send){w->index=v+4;break;} set_bit(s,b4); s += r*2+i4; \
179             case (v+5): if(s>=send){w->index=v+5;break;} set_bit(s,b5); s += r*4+i5; \
180             case (v+6): if(s>=send){w->index=v+6;break;} set_bit(s,b6); s += r*6+i6; \
181             case (v+7): if(s>=send){w->index=v+7;break;} set_bit(s,b7); s += r*2+i7; \
182             while (s + r*28 + it-1 < send) { \
183             set_bit(s + r * 0 + 0, b0); \
184             set_bit(s + r * 6 + i0, b1); \
185             set_bit(s + r * 10 + i0+i1, b2); \
186             set_bit(s + r * 12 + i0+i1+i2, b3); \
187             set_bit(s + r * 16 + i0+i1+i2+i3, b4); \
188             set_bit(s + r * 18 + i0+i1+i2+i3+i4, b5); \
189             set_bit(s + r * 22 + i0+i1+i2+i3+i4+i5, b6); \
190             set_bit(s + r * 28 + i0+i1+i2+i3+i4+i5+i6, b7); \
191             s += r*30 + it; \
192             } \
193             }
194 4719629           static wheel_t create_wheel(UV startp, uint32_t prime)
195             {
196             wheel_t w;
197 4719629           UV q = prime;
198 4719629           UV p2 = q*q;
199              
200 4719629 100         if (startp == 0) {
201 27102           wheel_t ws = { prime, p2/30, qinit30[q % 30] + 8*masknum30[prime % 30] };
202 27102           return ws;
203             }
204              
205 4692527 100         if (p2 < startp) {
206 4684883           q = 1+(startp-1)/prime;
207 4684883           q += distancewheel30[q % 30];
208 4684883           p2 = prime * q;
209             /* The offset if p2 overflows is still ok, or set to max_sieve_prime+1. */
210             /* if (p2 < startp) p2 = max_sieve_prime+1; */
211             }
212              
213 4692527           w.offset = (p2-startp) / 30;
214 4692527           w.index = qinit30[q % 30] + 8*masknum30[prime % 30];
215 4692527           w.prime = prime;
216 4692527           return w;
217             }
218              
219 7331519           static void mark_primes(unsigned char* s, UV bytes, wheel_t* w)
220             {
221 7331519 100         if (w->offset >= bytes) {
222 4427081           w->offset -= bytes;
223             } else {
224 2904438           const unsigned char* send = s + bytes;
225 2904438           uint32_t r = w->prime / 30;
226 2904438           s += w->offset;
227 2904438           switch (w->index) {
228 5236818 100         CROSS_INDEX( 0, 0,1,2,3,4,5,6,7, 0,0,0,0,0,0,0,1, 1); break;
    100          
    100          
    100          
    100          
    100          
    100          
    100          
    100          
229 5473921 100         CROSS_INDEX( 8, 1,5,4,0,7,3,2,6, 1,1,1,0,1,1,1,1, 7); break;
    100          
    100          
    100          
    100          
    100          
    100          
    100          
    100          
230 5276528 100         CROSS_INDEX(16, 2,4,0,6,1,7,3,5, 2,2,0,2,0,2,2,1, 11); break;
    100          
    100          
    100          
    100          
    100          
    100          
    100          
    100          
231 5149576 100         CROSS_INDEX(24, 3,0,6,5,2,1,7,4, 3,1,1,2,1,1,3,1, 13); break;
    100          
    100          
    100          
    100          
    100          
    100          
    100          
    100          
232 6911570 100         CROSS_INDEX(32, 4,7,1,2,5,6,0,3, 3,3,1,2,1,3,3,1, 17); break;
    100          
    100          
    100          
    100          
    100          
    100          
    100          
    100          
233 5974117 100         CROSS_INDEX(40, 5,3,7,1,6,0,4,2, 4,2,2,2,2,2,4,1, 19); break;
    100          
    100          
    100          
    100          
    100          
    100          
    100          
    100          
234 6351016 100         CROSS_INDEX(48, 6,2,3,7,0,4,5,1, 5,3,1,4,1,3,5,1, 23); break;
    100          
    100          
    100          
    100          
    100          
    100          
    100          
    100          
235 5795510 100         CROSS_INDEX(56, 7,6,5,4,3,2,1,0, 6,4,2,4,2,4,6,1, 29); break;
    100          
    100          
    100          
    100          
    100          
    100          
    100          
    100          
236             }
237 2904438           w->offset = s - send;
238             }
239 7331519           }
240              
241             /* Monolithic mod-30 wheel sieve */
242 195           unsigned char* sieve_erat30(UV end)
243             {
244             unsigned char *mem;
245             UV max_buf, limit, prime;
246              
247 195           max_buf = (end/30) + ((end%30) != 0);
248             /* Round up to a word */
249 195           max_buf = ((max_buf + sizeof(UV) - 1) / sizeof(UV)) * sizeof(UV);
250 195           New(0, mem, max_buf, unsigned char );
251              
252             /* Fill buffer with marked 7, 11, and 13 */
253 195           prime = sieve_prefill(mem, 0, max_buf-1);
254              
255 195           limit = isqrt(end); /* prime*prime can overflow */
256 17021 100         for ( ; prime <= limit; prime = next_prime_in_sieve(mem,prime,end)) {
257 16826           wheel_t w = create_wheel(0, prime);
258 16826           mark_primes(mem, max_buf, &w);
259             }
260 195           return mem;
261             }
262              
263 11           static void _primality_test_sieve(unsigned char* mem, UV startp, UV endp) {
264 7487 50         START_DO_FOR_EACH_SIEVE_PRIME(mem, 0, 0, endp-startp) {
    100          
    50          
    100          
    100          
265 6871 100         if (!BPSW(startp + p)) /* If the candidate is not prime, */
266 2789           mem[p/30] |= masktab30[p%30]; /* mark the sieve location. */
267 605           } END_DO_FOR_EACH_SIEVE_PRIME;
268 11           }
269 20           static void _sieve_range(unsigned char* mem, const unsigned char* sieve, UV startd, UV endd, UV limit) {
270 20           UV startp = 30*startd;
271 20           UV start_base_prime = sieve_prefill(mem, startd, endd);
272 73413 50         START_DO_FOR_EACH_SIEVE_PRIME(sieve, 0, start_base_prime, limit) { /* Sieve */
    100          
    100          
    100          
    100          
273 69902           wheel_t w = create_wheel(startp, p);
274 69902           mark_primes(mem, endd-startd+1, &w);
275 3431           } END_DO_FOR_EACH_SIEVE_PRIME;
276 20           }
277              
278 0           bool sieve_segment_partial(unsigned char* mem, UV startd, UV endd, UV depth)
279             {
280             const unsigned char* sieve;
281 0 0         UV startp = 30*startd, endp = (endd >= (UV_MAX/30)) ? UV_MAX-2 : 30*endd+29;
282 0           UV limit = isqrt(endp);
283 0 0         MPUassert(mem != 0 && endd >= startd && endp >= startp && depth >= 13,
    0          
    0          
    0          
284             "sieve_segment_partial bad arguments");
285             /* limit = min( sqrt(end), max-64-bit-prime, requested depth ) */
286 0 0         if (limit > max_sieve_prime) limit = max_sieve_prime;
287 0 0         if (limit > depth) limit = depth;
288 0           get_prime_cache(limit, &sieve); /* Get sieving primes */
289 0           _sieve_range(mem, sieve, startd, endd, limit);
290             release_prime_cache(sieve);
291 0           return 1;
292             }
293              
294             /* Segmented mod-30 wheel sieve */
295 163           bool sieve_segment(unsigned char* mem, UV startd, UV endd)
296             {
297             const unsigned char* sieve;
298 163 50         UV startp = 30*startd, endp = (endd >= (UV_MAX/30)) ? UV_MAX-2 : 30*endd+29;
299 163           UV sieve_size, limit = isqrt(endp);
300 163           int do_partial = do_partial_sieve(startp, endp);
301 163 50         MPUassert(mem != 0 && endd >= startd && endp >= startp,
    50          
    50          
302             "sieve_segment bad arguments");
303              
304 163           sieve_size = get_prime_cache(0, &sieve);
305              
306 163 100         if (sieve_size >= endp) {
307              
308             /* We can just use the primary cache */
309 143           memcpy(mem, sieve+startd, endd-startd+1);
310             release_prime_cache(sieve);
311              
312 20 50         } else if (!do_partial && sieve_size >= limit) {
    50          
313              
314             /* Full sieve and we have all sieving primes in hand */
315 20           _sieve_range(mem, sieve, startd, endd, limit);
316             release_prime_cache(sieve);
317              
318             } else {
319              
320             release_prime_cache(sieve);
321 0 0         if (do_partial)
322 0 0         limit >>= ((startp < (UV)1e16) ? 8 : 10);
323              
324             /* sieve_segment_partial(mem, startd, endd, limit); */
325 0           get_prime_cache(limit, &sieve);
326 0           _sieve_range(mem, sieve, startd, endd, limit);
327             release_prime_cache(sieve);
328              
329 0 0         if (do_partial)
330 0           _primality_test_sieve(mem, startp, endp);
331              
332             }
333 163           return 1;
334             }
335              
336 6578           bool sieve_segment_wheel(unsigned char* mem, UV startd, UV endd, wheel_t *warray, uint32_t wsize)
337             {
338 6578           uint32_t i = 0, limit, start_base_prime;
339 6578           uint32_t segsize = endd - startd + 1;
340 6578           UV startp = 30*startd;
341 6578 50         UV endp = (endd >= (UV_MAX/30)) ? UV_MAX-2 : 30*endd+29;
342 6578 50         MPUassert(mem != 0 && endd >= startd && endp >= startp,
    50          
    50          
343             "sieve_segment bad arguments");
344              
345             /* possibly use primary cache directly */
346              
347             /* Fill buffer with marked 7, 11, and 13 */
348 6578           start_base_prime = sieve_prefill(mem, startd, endd);
349 42143 100         while (i < wsize && warray[i].prime < start_base_prime)
    100          
350 35565           i++;
351              
352 6578           limit = isqrt(endp);
353 6578 50         if (limit > max_sieve_prime) limit = max_sieve_prime;
354              
355 7251369 100         while (i < wsize && warray[i].prime <= limit) {
    100          
356 7244791 100         if (warray[i].index >= 64)
357 4632901           warray[i] = create_wheel(startp, warray[i].prime);
358 7244791           mark_primes(mem, segsize, &(warray[i++]));
359             }
360              
361 6578 100         if (limit > warray[wsize-1].prime && warray[wsize-1].prime < max_sieve_prime)
    50          
362 11           _primality_test_sieve(mem, startp, endp);
363              
364 6578           return 1;
365             }
366              
367             /**************************************************************************/
368              
369             typedef struct {
370             UV lod;
371             UV hid;
372             UV low;
373             UV high;
374             UV endp;
375             UV segment_size;
376             unsigned char* segment;
377             unsigned char* base;
378             wheel_t *warray;
379             uint32_t wsize;
380             } segment_context_t;
381              
382             /*
383             * unsigned char* segment;
384             * UV seg_base, seg_low, seg_high;
385             * void* ctx = start_segment_primes(low, high, &segment);
386             * while (beg < 7) {
387             * beg = (beg <= 2) ? 2 : (beg <= 3) ? 3 : 5;
388             * .... with beg ....
389             * beg += 1 + (beg > 2);
390             * }
391             * while (next_segment_primes(ctx, &seg_base, &seg_low, &seg_high)) {
392             * START_DO_FOR_EACH_SIEVE_PRIME( segment, seg_low - seg_base, seg_high - seg_base )
393             * .... with seg_base + p ....
394             * END_DO_FOR_EACH_SIEVE_PRIME
395             * }
396             * end_segment_primes(ctx);
397             */
398              
399 6159           void* start_segment_primes(UV low, UV high, unsigned char** segmentmem)
400             {
401             segment_context_t* ctx;
402             UV nsegments, range;
403              
404 6159 50         MPUassert( high >= low, "start_segment_primes bad arguments");
405 6159           New(0, ctx, 1, segment_context_t);
406 6159           ctx->low = low;
407 6159           ctx->high = high;
408 6159           ctx->lod = low / 30;
409 6159           ctx->hid = high / 30;
410 6159 50         ctx->endp = (ctx->hid >= (UV_MAX/30)) ? UV_MAX-2 : 30*ctx->hid+29;
411 6159           range = ctx->hid - ctx->lod + 1; /* range in bytes */
412              
413             #if BITS_PER_WORD == 64
414 6159 100         if (high > 1e10 && range > 32*1024-16) {
    100          
415             UV size, div;
416             /* Use larger segments */
417 3           size = isqrt(32*isqrt(high)) * (logint(high,2)-2);
418 3 50         if (size < 128*1024) size = 128*1024;
419             /* Evenly split the range into segments */
420 3           div = (range+size-1)/size;
421 3 50         size = (div <= 1) ? range : (range+div-1)/div;
422 3           ctx->segment_size = size;
423 3           New(0, ctx->segment, size, unsigned char);
424             } else
425             #endif
426 6156           ctx->segment = get_prime_segment( &(ctx->segment_size) );
427 6159           *segmentmem = ctx->segment;
428 6159           nsegments = (((high-low+29)/30)+ctx->segment_size-1) / ctx->segment_size;
429              
430 6159 50         MPUverbose(3, "segment sieve: byte range %lu split into %lu segments of size %lu\n", (unsigned long)range, (unsigned long)nsegments, (unsigned long)ctx->segment_size);
431              
432 6159           ctx->base = 0;
433 6159           ctx->warray = 0;
434 6159           ctx->wsize = 0;
435             #if 1
436             { /* Generate wheel data for this segment sieve */
437 6159           const UV maxsieve = UVCONST(400000000);
438             UV limit, nprimes;
439             wheel_t *warray;
440 6159           wheel_t w = {0,0,128};
441 6159           uint32_t wsize = 0;
442             /* Number of primes for a full sieve */
443 6159           limit = isqrt(ctx->endp);
444             /* For small ranges a partial sieve is much faster */
445 6159 100         if (do_partial_sieve(low, high))
446 11 100         limit >>= ((low < (UV)1e16) ? 8 : 10);
447 6159 50         if (limit <= maxsieve) {
448             /* Bump to one more than needed. */
449 6159           limit = next_prime(limit);
450             /* We'll make space for this many */
451 6159           nprimes = max_nprimes(limit);
452 6159 50         MPUverbose(4, "segment sieve %lu - %lu, primes to %lu (max %lu)\n", (unsigned long)low, (unsigned long)high, (unsigned long)limit, (unsigned long)nprimes);
453 6159 50         New(0, warray, nprimes, wheel_t);
454 4839134 50         START_DO_FOR_EACH_PRIME(0,limit) {
    0          
    100          
    100          
    100          
    100          
    100          
    100          
    100          
    50          
    100          
455 4668635 50         if (wsize >= nprimes) croak("segment bad upper count");
456 4668635           w.prime = p;
457 4668635           warray[wsize++] = w;
458             } END_DO_FOR_EACH_PRIME;
459 6159           ctx->warray = warray;
460 6159           ctx->wsize = wsize;
461             }
462             }
463             #endif
464              
465 6159           return (void*) ctx;
466             }
467              
468 12664           bool next_segment_primes(void* vctx, UV* base, UV* low, UV* high)
469             {
470             UV seghigh_d, range_d;
471 12664           segment_context_t* ctx = (segment_context_t*) vctx;
472              
473 12664 100         if (ctx->lod > ctx->hid) return 0;
474              
475 13156           seghigh_d = ((ctx->hid - ctx->lod) < ctx->segment_size)
476             ? ctx->hid
477 6578 100         : (ctx->lod + ctx->segment_size - 1);
478 6578           range_d = seghigh_d - ctx->lod + 1;
479 6578           *low = ctx->low;
480 6578 100         *high = (seghigh_d == ctx->hid) ? ctx->high : (seghigh_d*30 + 29);
481 6578           *base = ctx->lod * 30;
482              
483 6578 50         MPUassert( seghigh_d >= ctx->lod, "next_segment_primes: highd < lowd");
484 6578 50         MPUassert( range_d <= ctx->segment_size, "next_segment_primes: range > segment size");
485              
486 6578 50         if (ctx->warray != 0)
487 6578           sieve_segment_wheel(ctx->segment, ctx->lod, seghigh_d, ctx->warray, ctx->wsize);
488             else
489 0           sieve_segment(ctx->segment, ctx->lod, seghigh_d);
490              
491 6578           ctx->lod += range_d;
492 6578           ctx->low = *high + 2;
493              
494 6578           return 1;
495             }
496              
497 6159           void end_segment_primes(void* vctx)
498             {
499 6159           segment_context_t* ctx = (segment_context_t*) vctx;
500 6159 50         MPUassert(ctx != 0, "end_segment_primes given a null pointer");
501 6159 50         if (ctx->segment != 0) {
502 6159           release_prime_segment(ctx->segment);
503 6159           ctx->segment = 0;
504             }
505 6159 50         if (ctx->base != 0) {
506 0           Safefree(ctx->base);
507 0           ctx->base = 0;
508             }
509 6159 50         if (ctx->warray != 0) {
510 6159           Safefree(ctx->warray);
511 6159           ctx->warray = 0;
512             }
513 6159           Safefree(ctx);
514 6159           }
515              
516 69           UV range_prime_sieve(UV**list, UV lo, UV hi)
517             {
518 69           UV *P, Psize, i = 0;
519 69 50         if (hi < lo) { *list = 0; return 0; }
520 69           Psize = prime_count_upper(hi) - prime_count_lower(lo) + 1;
521 69 50         New(0, P, Psize, UV);
522 69 100         if (lo <= 2 && hi >= 2) P[i++] = 2;
    50          
523 69 100         if (lo <= 3 && hi >= 3) P[i++] = 3;
    50          
524 69 100         if (lo <= 5 && hi >= 5) P[i++] = 5;
    100          
525             {
526             unsigned char* segment;
527             UV seg_base, seg_low, seg_high;
528 69           void* ctx = start_segment_primes(lo, hi, &segment);
529 304 100         while (next_segment_primes(ctx, &seg_base, &seg_low, &seg_high)) {
530 11229372 50         START_DO_FOR_EACH_SIEVE_PRIME( segment, seg_base, seg_low, seg_high )
    100          
    100          
    100          
    100          
531 10518946           P[i++] = p;
532 710170           END_DO_FOR_EACH_SIEVE_PRIME
533             }
534 69           end_segment_primes(ctx);
535             }
536 69           *list = P;
537 69           return i;
538             }
539              
540 1670           uint32_t range_prime_sieve_32(uint32_t** list, uint32_t n, uint32_t offset)
541             {
542 1670           uint32_t *P, i = offset;
543              
544 1670 50         if (n < 2) { *list = 0; return 0; }
545 1670 50         New(0, P, max_nprimes(n) + offset + 3, uint32_t); /* Allocate list */
546 1670 100         if (offset > 0) memset(P, 0, offset * sizeof(uint32_t)); /* Zero to offset */
547 1670           P[i++] = 2; P[i++] = 3; P[i++] = 5; /* Fill in 2/3/5 */
548              
549 1670 100         if (n >= 7) {
550             unsigned char* segment;
551             UV seg_base, seg_low, seg_high;
552 1542           void* ctx = start_segment_primes(7, n, &segment);
553 3084 100         while (next_segment_primes(ctx, &seg_base, &seg_low, &seg_high)) {
554 244056 50         START_DO_FOR_EACH_SIEVE_PRIME( segment, seg_base, seg_low, seg_high )
    100          
    50          
    100          
    100          
555 235022           P[i++] = p;
556 7492           END_DO_FOR_EACH_SIEVE_PRIME
557             }
558 1542           end_segment_primes(ctx);
559             }
560 1770 100         while (P[i-1] > n) i--; /* Truncate the count if necesssary. */
561 1670           *list = P;
562 1670           return i-offset; /* Returns number of primes, excluding offset */
563             }