File Coverage

lehmer.c

Criterion	Covered	Total	%
statement	116	121	95.8
branch	51	72	70.8
condition			n/a
subroutine			n/a
pod			n/a
total	167	193	86.5

line	stmt	bran	code
1			#include
2			#include
3			#include
4			#include
5
6			#define USE_PHI_CACHE 1
7			#define FUNC_isqrt 1
8			#include "lehmer.h"
9			#include "util.h"
10			#include "cache.h"
11			#include "sieve.h"
12			#include "prime_counts.h"
13			#include "prime_count_cache.h"
14			#include "legendre_phi.h"
15
16			/*****************************************************************************
17			*
18			* Counting primes with Legendre, Meissel, and Lehmer methods.
19			*
20			* Since we have a reasonable extended LMO, this is just for demonstration.
21			*
22			* The first versions of this were in 2012 and 2013, and included a novel
23			* phi calculation using iterative list merging, which greatly sped up
24			* the calculations compared to recursive phi calculations, even when caching
25			* was added.
26			*
27			* Kim Walisch started his primecount project in mid-2013, which quickly
28			* surpassed this in speed. Currently (2021) his project is substantially
29			* faster, as well as having support for the Deleglise-Rivat and
30			* Gourdon algorithms, efficient parallelization, and big number support.
31			*
32			* Reference: Hans Riesel, "Prime Numbers and Computer Methods for
33			* Factorization", 2nd edition, 1994.
34			*/
35
36			/* Below this size, just get primecount using standard methods */
37			#define SIEVE_LIMIT 60000000
38			/* Bigger prime count cache in Lehmer loop */
39			#define SIEVE_MULT 1
40
41			static int const verbose = 0;
42			#define STAGE_TIMING 0
43
44			#if STAGE_TIMING
45			#include
46			#define DECLARE_TIMING_VARIABLES struct timeval t0, t1;
47			#define TIMING_START gettimeofday(&t0, 0);
48			#define TIMING_END_PRINT(text) \
49			{ unsigned long long t; \
50			gettimeofday(&t1, 0); \
51			t = (t1.tv_sec-t0.tv_sec) * 1000000 + (t1.tv_usec - t0.tv_usec); \
52			printf("%s: %10.5f\n", text, ((double)t) / 1000000); }
53			#else
54			#define DECLARE_TIMING_VARIABLES
55			#define TIMING_START
56			#define TIMING_END_PRINT(text)
57			#endif
58
59
60	2		static UV P2_with_primes(UV n, UV a, UV b, const uint32_t *primes, uint32_t lastidx)
61			{
62			UV P2, lastw, lastwpc, i;
63	2		UV lastpc = 6 * primes[lastidx];
64	2		void* pcache = prime_count_cache_create(lastpc);
65
66			/* Ensure we have a large enough base sieve */
67	2		prime_precalc(isqrt(n / primes[a+1]));
68
69	2		P2 = lastw = lastwpc = 0;
70	1888	100	for (i = b; i > a; i--) {
71	1886		UV w = n / primes[i];
72	1610		lastwpc = (w <= lastpc) ? prime_count_cache_lookup(pcache, w)
73	1886	100	: lastwpc + segment_prime_count(lastw+1, w);
74	1886		lastw = w;
75	1886		P2 += lastwpc;
76			}
77	2		P2 -= ((b+a-2) * (b-a+1) / 2) - a + 1;
78
79	2		prime_count_cache_destroy(pcache);
80	2		return P2;
81			}
82			/* b = prime_count(isqrt(n)) */
83	1		static UV P2(UV n, UV a, UV b)
84			{
85			uint32_t lastidx, *primes;
86			UV maxn, P2;
87
88	1		maxn = nth_prime_upper( b );
89	1	50	if (maxn > 4294967291U) maxn = 4294967291U;
90	1		lastidx = range_prime_sieve_32(&primes, maxn, 1);
91	1	50	MPUassert(lastidx >= b, "failed to generate enough primes\n");
92
93	1		P2 = P2_with_primes(n, a, b, primes, lastidx);
94
95	1		Safefree(primes);
96	1		return P2;
97			}
98
99
100			/* Legendre's method. Interesting and a good test for phi(x,a), but Lehmer's
101			* method is much faster (Legendre: a = pi(n^.5), Lehmer: a = pi(n^.25)) */
102	2		UV legendre_prime_count(UV n)
103			{
104			UV a, phina;
105	2	100	if (n < SIEVE_LIMIT)
106	1		return segment_prime_count(2, n);
107
108	1		a = legendre_prime_count(isqrt(n));
109	1		phina = legendre_phi(n, a);
110	1		return phina + a - 1;
111			}
112
113
114			/* Meissel's method. */
115	3		UV meissel_prime_count(UV n)
116			{
117			UV a, b, sum;
118	3	100	if (n < SIEVE_LIMIT)
119	2		return segment_prime_count(2, n);
120
121	1		a = meissel_prime_count(icbrt(n)); /* a = Pi(floor(n^1/3)) [max 192725] */
122	1		b = meissel_prime_count(isqrt(n)); /* b = Pi(floor(n^1/2)) [max 203280221] */
123
124	1		sum = legendre_phi(n, a) + a - 1 - P2(n, a, b);
125	1		return sum;
126			}
127
128			/* Lehmer's method. This is basically Riesel's Lehmer function (page 22),
129			* with some additional code to help optimize it. */
130	6		UV lehmer_prime_count(UV n)
131			{
132			UV z, a, b, c, sum, i, j, lastidx, lastpc, lastw, lastwpc;
133	6		uint32_t* primes = 0; /* small prime cache, first b=pi(z)=pi(sqrt(n)) */
134			void* pcache; /* Prime count cache */
135			DECLARE_TIMING_VARIABLES;
136
137	6	100	if (n < SIEVE_LIMIT)
138	5		return segment_prime_count(2, n);
139
140			/* Protect against overflow. 2^32-1 and 2^64-1 are both divisible by 3. */
141	1	50	if (n == UV_MAX) {
142	0	0	if ( (n%3) == 0 \|\| (n%5) == 0 \|\| (n%7) == 0 \|\| (n%31) == 0 )
		0
		0
		0
143	0		n--;
144			else
145	0		return segment_prime_count(2,n);
146			}
147
148	1	50	if (verbose > 0) printf("lehmer %lu stage 1: calculate a,b,c \n", n);
149			TIMING_START;
150	1		z = isqrt(n);
151	1		a = lehmer_prime_count(isqrt(z)); /* a = Pi(floor(n^1/4)) [max 6542] */
152	1		b = lehmer_prime_count(z); /* b = Pi(floor(n^1/2)) [max 203280221] */
153	1		c = lehmer_prime_count(icbrt(n)); /* c = Pi(floor(n^1/3)) [max 192725] */
154			TIMING_END_PRINT("stage 1")
155
156	1	50	if (verbose > 0) printf("lehmer %lu stage 2: phi(x,a) (z=%lu a=%lu b=%lu c=%lu)\n", n, z, a, b, c);
157			TIMING_START;
158	1		sum = legendre_phi(n, a) + ((b+a-2) * (b-a+1) / 2);
159			TIMING_END_PRINT("phi(x,a)")
160
161			/* The first b primes are used in stage 4. Hence, primes to isqrt(n). */
162			TIMING_START;
163	1		lastidx = range_prime_sieve_32(&primes, isqrt(n), 1);
164	1	50	MPUassert(lastidx >= b, "failed to generate enough primes\n");
165			TIMING_END_PRINT("small primes")
166	1	50	if (verbose > 0) printf("lehmer %lu stage 3: %lu small primes\n", n, lastidx);
167
168			TIMING_START;
169	1		lastpc = SIEVE_MULT * primes[lastidx];
170			if (SIEVE_MULT == 1)
171	1		pcache = prime_count_cache_create_with_primes(primes, lastidx);
172			else
173			pcache = prime_count_cache_create(lastpc);
174			TIMING_END_PRINT("prime count cache")
175
176			TIMING_START;
177			/* Speed up all the prime counts by doing a big base sieve */
178	1		prime_precalc( (UV) pow(n, 3.0/5.0) );
179			/* Ensure we have the base sieve for big prime_count ( n/primes[i] ). */
180			/* This is about 75k for n=10^13, 421k for n=10^15, 2.4M for n=10^17 */
181	1		prime_precalc(isqrt(n / primes[a+1]));
182			TIMING_END_PRINT("sieve precalc")
183
184	1	50	if (verbose > 0) printf("lehmer %lu stage 4: loop %lu to %lu, pc to %lu\n", n, a+1, b, n/primes[a+1]);
185			TIMING_START;
186			/* Reverse the i loop so w increases. Count w in segments. */
187	1		lastw = 0;
188	1		lastwpc = 0;
189	999	100	for (i = b; i >= a+1; i--) {
190	998		UV w = n / primes[i];
191	0		lastwpc = (w <= lastpc) ? prime_count_cache_lookup(pcache, w)
192	998	50	: lastwpc + segment_prime_count(lastw+1, w);
193	998		lastw = w;
194	998		sum = sum - lastwpc;
195	998	100	if (i <= c) {
196	55		UV bi = prime_count_cache_lookup(pcache, isqrt(w));
197	2893	100	for (j = i; j <= bi; j++) {
198	2838		sum = sum - prime_count_cache_lookup(pcache, w / primes[j]) + j - 1;
199			}
200			/* We could wrap the +j-1 in: sum += ((bi+1-i)(bi+i))/2 - (bi-i+1); /
201			}
202			}
203			TIMING_END_PRINT("stage 4")
204	1		prime_count_cache_destroy(pcache);
205	1		Safefree(primes);
206	1		return sum;
207			}
208
209
210			/* The Lagarias-Miller-Odlyzko method.
211			* Naive implementation without optimizations.
212			* About the same speed as Lehmer, a bit less memory.
213			* A better implementation can be 10-50x faster and much less memory.
214			*/
215	1		UV LMOS_prime_count(UV n)
216			{
217			UV n13, a, b, sum, i, j, k, c, lastidx, P2, S1, S2;
218			uint32_t primec;
219	1		uint32_t* primes = 0; /* small prime cache */
220	1		signed char* mu = 0; /* moebius to n^1/3 */
221	1		uint32_t* lpf = 0; /* least prime factor to n^1/3 */
222			void pcache; / Cache for recursive phi */
223			DECLARE_TIMING_VARIABLES;
224
225	1	50	if (n < SIEVE_LIMIT)
226	0		return segment_prime_count(2, n);
227
228	1		n13 = icbrt(n); /* n13 = floor(n^1/3) [max 2642245] */
229	1		a = lehmer_prime_count(n13); /* a = Pi(floor(n^1/3)) [max 192725] */
230	1		b = lehmer_prime_count(isqrt(n)); /* b = Pi(floor(n^1/2)) [max 203280221] */
231
232			TIMING_START;
233	1		lastidx = range_prime_sieve_32(&primes, isqrt(n), 1);
234	1	50	MPUassert(lastidx >= b, "failed to generate enough primes\n");
235			TIMING_END_PRINT("small primes")
236
237			TIMING_START;
238	1		New(0, mu, n13+1, signed char);
239	1		memset(mu, 1, sizeof(signed char) * (n13+1));
240	1	50	Newz(0, lpf, n13+1, uint32_t);
241	1		mu[0] = 0;
242	405	100	for (i = 1; i <= n13; i++) {
243	404		UV primei = primes[i];
244	1202	100	for (j = primei; j <= n13; j += primei) {
245	798		mu[j] = -mu[j];
246	798	100	if (lpf[j] == 0) lpf[j] = primei;
247			}
248	404		k = primei * primei;
249	580	100	for (j = k; j <= n13; j += k)
250	176		mu[j] = 0;
251			}
252	1		lpf[1] = UVCONST(4294967295); /* Set lpf[1] to max */
253
254			/* Remove mu[i] == 0 using lpf */
255	405	100	for (i = 1; i <= n13; i++)
256	404	100	if (mu[i] == 0)
257	158		lpf[i] = 0;
258			TIMING_END_PRINT("mu and lpf")
259
260			/* Thanks to Kim Walisch for help with the S1+S2 calculations. */
261	1	50	c = (a < tiny_phi_max_a()) ? a : tiny_phi_max_a();
262	1		primec = primes[c];
263	1		S1 = 0;
264	1		S2 = 0;
265	1		pcache = prepare_cached_legendre_phi(n, a);
266
267			TIMING_START;
268	405	100	for (i = 1; i <= n13; i++)
269	404	100	if (lpf[i] > primec)
270	76		S1 += mu[i] * tiny_phi(n/i, c);
271			TIMING_END_PRINT("S1")
272
273			/* TODO: This is about 1.5x slower than the old way. Fix. */
274			TIMING_START;
275	73	100	for (i = c; i+1 < a; i++) {
276	72		uint32_t p = primes[i+1];
277	28903	100	for (j = (n13/p)+1; j <= n13; j++)
278	28831	100	if (lpf[j] > p)
279	2626		S2 += -mu[j] * cached_legendre_phi(pcache, n / (j*p), i);
280			}
281			TIMING_END_PRINT("S2")
282	1		destroy_cached_legendre_phi(pcache);
283	1		Safefree(lpf);
284	1		Safefree(mu);
285
286			TIMING_START;
287	1		prime_precalc( (UV) pow(n, 2.9/5.0) );
288	1		P2 = P2_with_primes(n, a, b, primes, lastidx);
289			TIMING_END_PRINT("P2")
290	1		Safefree(primes);
291
292			/* printf("S1 = %lu\nS2 = %lu\na = %lu\nP2 = %lu\n", S1, S2, a, P2); */
293	1		sum = (S1 + S2) + a - 1 - P2;
294	1		return sum;
295			}