File Coverage

lehmer.c
Criterion Covered Total %
statement 116 121 95.8
branch 51 72 70.8
condition n/a
subroutine n/a
pod n/a
total 167 193 86.5


line stmt bran cond sub pod time code
1             #include
2             #include
3             #include
4             #include
5              
6             #define USE_PHI_CACHE 1
7             #define FUNC_isqrt 1
8             #include "lehmer.h"
9             #include "util.h"
10             #include "cache.h"
11             #include "sieve.h"
12             #include "prime_counts.h"
13             #include "prime_count_cache.h"
14             #include "legendre_phi.h"
15              
16             /*****************************************************************************
17             *
18             * Counting primes with Legendre, Meissel, and Lehmer methods.
19             *
20             * Since we have a reasonable extended LMO, this is just for demonstration.
21             *
22             * The first versions of this were in 2012 and 2013, and included a novel
23             * phi calculation using iterative list merging, which greatly sped up
24             * the calculations compared to recursive phi calculations, even when caching
25             * was added.
26             *
27             * Kim Walisch started his primecount project in mid-2013, which quickly
28             * surpassed this in speed. Currently (2021) his project is substantially
29             * faster, as well as having support for the Deleglise-Rivat and
30             * Gourdon algorithms, efficient parallelization, and big number support.
31             *
32             * Reference: Hans Riesel, "Prime Numbers and Computer Methods for
33             * Factorization", 2nd edition, 1994.
34             */
35              
36             /* Below this size, just get primecount using standard methods */
37             #define SIEVE_LIMIT 60000000
38             /* Bigger prime count cache in Lehmer loop */
39             #define SIEVE_MULT 1
40              
41             static int const verbose = 0;
42             #define STAGE_TIMING 0
43              
44             #if STAGE_TIMING
45             #include
46             #define DECLARE_TIMING_VARIABLES struct timeval t0, t1;
47             #define TIMING_START gettimeofday(&t0, 0);
48             #define TIMING_END_PRINT(text) \
49             { unsigned long long t; \
50             gettimeofday(&t1, 0); \
51             t = (t1.tv_sec-t0.tv_sec) * 1000000 + (t1.tv_usec - t0.tv_usec); \
52             printf("%s: %10.5f\n", text, ((double)t) / 1000000); }
53             #else
54             #define DECLARE_TIMING_VARIABLES
55             #define TIMING_START
56             #define TIMING_END_PRINT(text)
57             #endif
58              
59              
60 2           static UV P2_with_primes(UV n, UV a, UV b, const uint32_t *primes, uint32_t lastidx)
61             {
62             UV P2, lastw, lastwpc, i;
63 2           UV lastpc = 6 * primes[lastidx];
64 2           void* pcache = prime_count_cache_create(lastpc);
65              
66             /* Ensure we have a large enough base sieve */
67 2           prime_precalc(isqrt(n / primes[a+1]));
68              
69 2           P2 = lastw = lastwpc = 0;
70 1888 100         for (i = b; i > a; i--) {
71 1886           UV w = n / primes[i];
72 1610           lastwpc = (w <= lastpc) ? prime_count_cache_lookup(pcache, w)
73 1886 100         : lastwpc + segment_prime_count(lastw+1, w);
74 1886           lastw = w;
75 1886           P2 += lastwpc;
76             }
77 2           P2 -= ((b+a-2) * (b-a+1) / 2) - a + 1;
78              
79 2           prime_count_cache_destroy(pcache);
80 2           return P2;
81             }
82             /* b = prime_count(isqrt(n)) */
83 1           static UV P2(UV n, UV a, UV b)
84             {
85             uint32_t lastidx, *primes;
86             UV maxn, P2;
87              
88 1           maxn = nth_prime_upper( b );
89 1 50         if (maxn > 4294967291U) maxn = 4294967291U;
90 1           lastidx = range_prime_sieve_32(&primes, maxn, 1);
91 1 50         MPUassert(lastidx >= b, "failed to generate enough primes\n");
92              
93 1           P2 = P2_with_primes(n, a, b, primes, lastidx);
94              
95 1           Safefree(primes);
96 1           return P2;
97             }
98              
99              
100             /* Legendre's method. Interesting and a good test for phi(x,a), but Lehmer's
101             * method is much faster (Legendre: a = pi(n^.5), Lehmer: a = pi(n^.25)) */
102 2           UV legendre_prime_count(UV n)
103             {
104             UV a, phina;
105 2 100         if (n < SIEVE_LIMIT)
106 1           return segment_prime_count(2, n);
107              
108 1           a = legendre_prime_count(isqrt(n));
109 1           phina = legendre_phi(n, a);
110 1           return phina + a - 1;
111             }
112              
113              
114             /* Meissel's method. */
115 3           UV meissel_prime_count(UV n)
116             {
117             UV a, b, sum;
118 3 100         if (n < SIEVE_LIMIT)
119 2           return segment_prime_count(2, n);
120              
121 1           a = meissel_prime_count(icbrt(n)); /* a = Pi(floor(n^1/3)) [max 192725] */
122 1           b = meissel_prime_count(isqrt(n)); /* b = Pi(floor(n^1/2)) [max 203280221] */
123              
124 1           sum = legendre_phi(n, a) + a - 1 - P2(n, a, b);
125 1           return sum;
126             }
127              
128             /* Lehmer's method. This is basically Riesel's Lehmer function (page 22),
129             * with some additional code to help optimize it. */
130 6           UV lehmer_prime_count(UV n)
131             {
132             UV z, a, b, c, sum, i, j, lastidx, lastpc, lastw, lastwpc;
133 6           uint32_t* primes = 0; /* small prime cache, first b=pi(z)=pi(sqrt(n)) */
134             void* pcache; /* Prime count cache */
135             DECLARE_TIMING_VARIABLES;
136              
137 6 100         if (n < SIEVE_LIMIT)
138 5           return segment_prime_count(2, n);
139              
140             /* Protect against overflow. 2^32-1 and 2^64-1 are both divisible by 3. */
141 1 50         if (n == UV_MAX) {
142 0 0         if ( (n%3) == 0 || (n%5) == 0 || (n%7) == 0 || (n%31) == 0 )
    0          
    0          
    0          
143 0           n--;
144             else
145 0           return segment_prime_count(2,n);
146             }
147              
148 1 50         if (verbose > 0) printf("lehmer %lu stage 1: calculate a,b,c \n", n);
149             TIMING_START;
150 1           z = isqrt(n);
151 1           a = lehmer_prime_count(isqrt(z)); /* a = Pi(floor(n^1/4)) [max 6542] */
152 1           b = lehmer_prime_count(z); /* b = Pi(floor(n^1/2)) [max 203280221] */
153 1           c = lehmer_prime_count(icbrt(n)); /* c = Pi(floor(n^1/3)) [max 192725] */
154             TIMING_END_PRINT("stage 1")
155              
156 1 50         if (verbose > 0) printf("lehmer %lu stage 2: phi(x,a) (z=%lu a=%lu b=%lu c=%lu)\n", n, z, a, b, c);
157             TIMING_START;
158 1           sum = legendre_phi(n, a) + ((b+a-2) * (b-a+1) / 2);
159             TIMING_END_PRINT("phi(x,a)")
160              
161             /* The first b primes are used in stage 4. Hence, primes to isqrt(n). */
162             TIMING_START;
163 1           lastidx = range_prime_sieve_32(&primes, isqrt(n), 1);
164 1 50         MPUassert(lastidx >= b, "failed to generate enough primes\n");
165             TIMING_END_PRINT("small primes")
166 1 50         if (verbose > 0) printf("lehmer %lu stage 3: %lu small primes\n", n, lastidx);
167              
168             TIMING_START;
169 1           lastpc = SIEVE_MULT * primes[lastidx];
170             if (SIEVE_MULT == 1)
171 1           pcache = prime_count_cache_create_with_primes(primes, lastidx);
172             else
173             pcache = prime_count_cache_create(lastpc);
174             TIMING_END_PRINT("prime count cache")
175              
176             TIMING_START;
177             /* Speed up all the prime counts by doing a big base sieve */
178 1           prime_precalc( (UV) pow(n, 3.0/5.0) );
179             /* Ensure we have the base sieve for big prime_count ( n/primes[i] ). */
180             /* This is about 75k for n=10^13, 421k for n=10^15, 2.4M for n=10^17 */
181 1           prime_precalc(isqrt(n / primes[a+1]));
182             TIMING_END_PRINT("sieve precalc")
183              
184 1 50         if (verbose > 0) printf("lehmer %lu stage 4: loop %lu to %lu, pc to %lu\n", n, a+1, b, n/primes[a+1]);
185             TIMING_START;
186             /* Reverse the i loop so w increases. Count w in segments. */
187 1           lastw = 0;
188 1           lastwpc = 0;
189 999 100         for (i = b; i >= a+1; i--) {
190 998           UV w = n / primes[i];
191 0           lastwpc = (w <= lastpc) ? prime_count_cache_lookup(pcache, w)
192 998 50         : lastwpc + segment_prime_count(lastw+1, w);
193 998           lastw = w;
194 998           sum = sum - lastwpc;
195 998 100         if (i <= c) {
196 55           UV bi = prime_count_cache_lookup(pcache, isqrt(w));
197 2893 100         for (j = i; j <= bi; j++) {
198 2838           sum = sum - prime_count_cache_lookup(pcache, w / primes[j]) + j - 1;
199             }
200             /* We could wrap the +j-1 in: sum += ((bi+1-i)*(bi+i))/2 - (bi-i+1); */
201             }
202             }
203             TIMING_END_PRINT("stage 4")
204 1           prime_count_cache_destroy(pcache);
205 1           Safefree(primes);
206 1           return sum;
207             }
208              
209              
210             /* The Lagarias-Miller-Odlyzko method.
211             * Naive implementation without optimizations.
212             * About the same speed as Lehmer, a bit less memory.
213             * A better implementation can be 10-50x faster and much less memory.
214             */
215 1           UV LMOS_prime_count(UV n)
216             {
217             UV n13, a, b, sum, i, j, k, c, lastidx, P2, S1, S2;
218             uint32_t primec;
219 1           uint32_t* primes = 0; /* small prime cache */
220 1           signed char* mu = 0; /* moebius to n^1/3 */
221 1           uint32_t* lpf = 0; /* least prime factor to n^1/3 */
222             void *pcache; /* Cache for recursive phi */
223             DECLARE_TIMING_VARIABLES;
224              
225 1 50         if (n < SIEVE_LIMIT)
226 0           return segment_prime_count(2, n);
227              
228 1           n13 = icbrt(n); /* n13 = floor(n^1/3) [max 2642245] */
229 1           a = lehmer_prime_count(n13); /* a = Pi(floor(n^1/3)) [max 192725] */
230 1           b = lehmer_prime_count(isqrt(n)); /* b = Pi(floor(n^1/2)) [max 203280221] */
231              
232             TIMING_START;
233 1           lastidx = range_prime_sieve_32(&primes, isqrt(n), 1);
234 1 50         MPUassert(lastidx >= b, "failed to generate enough primes\n");
235             TIMING_END_PRINT("small primes")
236              
237             TIMING_START;
238 1           New(0, mu, n13+1, signed char);
239 1           memset(mu, 1, sizeof(signed char) * (n13+1));
240 1 50         Newz(0, lpf, n13+1, uint32_t);
241 1           mu[0] = 0;
242 405 100         for (i = 1; i <= n13; i++) {
243 404           UV primei = primes[i];
244 1202 100         for (j = primei; j <= n13; j += primei) {
245 798           mu[j] = -mu[j];
246 798 100         if (lpf[j] == 0) lpf[j] = primei;
247             }
248 404           k = primei * primei;
249 580 100         for (j = k; j <= n13; j += k)
250 176           mu[j] = 0;
251             }
252 1           lpf[1] = UVCONST(4294967295); /* Set lpf[1] to max */
253              
254             /* Remove mu[i] == 0 using lpf */
255 405 100         for (i = 1; i <= n13; i++)
256 404 100         if (mu[i] == 0)
257 158           lpf[i] = 0;
258             TIMING_END_PRINT("mu and lpf")
259              
260             /* Thanks to Kim Walisch for help with the S1+S2 calculations. */
261 1 50         c = (a < tiny_phi_max_a()) ? a : tiny_phi_max_a();
262 1           primec = primes[c];
263 1           S1 = 0;
264 1           S2 = 0;
265 1           pcache = prepare_cached_legendre_phi(n, a);
266              
267             TIMING_START;
268 405 100         for (i = 1; i <= n13; i++)
269 404 100         if (lpf[i] > primec)
270 76           S1 += mu[i] * tiny_phi(n/i, c);
271             TIMING_END_PRINT("S1")
272              
273             /* TODO: This is about 1.5x slower than the old way. Fix. */
274             TIMING_START;
275 73 100         for (i = c; i+1 < a; i++) {
276 72           uint32_t p = primes[i+1];
277 28903 100         for (j = (n13/p)+1; j <= n13; j++)
278 28831 100         if (lpf[j] > p)
279 2626           S2 += -mu[j] * cached_legendre_phi(pcache, n / (j*p), i);
280             }
281             TIMING_END_PRINT("S2")
282 1           destroy_cached_legendre_phi(pcache);
283 1           Safefree(lpf);
284 1           Safefree(mu);
285              
286             TIMING_START;
287 1           prime_precalc( (UV) pow(n, 2.9/5.0) );
288 1           P2 = P2_with_primes(n, a, b, primes, lastidx);
289             TIMING_END_PRINT("P2")
290 1           Safefree(primes);
291              
292             /* printf("S1 = %lu\nS2 = %lu\na = %lu\nP2 = %lu\n", S1, S2, a, P2); */
293 1           sum = (S1 + S2) + a - 1 - P2;
294 1           return sum;
295             }