| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | #include | 
| 2 |  |  |  |  |  |  | #include | 
| 3 |  |  |  |  |  |  | #include | 
| 4 |  |  |  |  |  |  | #include | 
| 5 |  |  |  |  |  |  | #include | 
| 6 |  |  |  |  |  |  |  | 
| 7 |  |  |  |  |  |  | /* Use long double to get a little more precision when we're calculating the | 
| 8 |  |  |  |  |  |  | * math functions -- especially those calculated with a series.  Long double | 
| 9 |  |  |  |  |  |  | * is defined in C89 (ISO C).  Note that 'long double' on many platforms is | 
| 10 |  |  |  |  |  |  | * identical to 'double so it may buy us nothing.  But it's worth trying. | 
| 11 |  |  |  |  |  |  | * | 
| 12 |  |  |  |  |  |  | * While the type was in C89, math functions using it are in C99.  Some | 
| 13 |  |  |  |  |  |  | * systems didn't really get it right (e.g. NetBSD which left out some | 
| 14 |  |  |  |  |  |  | * functions for 13 years). | 
| 15 |  |  |  |  |  |  | */ | 
| 16 |  |  |  |  |  |  | #include | 
| 17 |  |  |  |  |  |  | #if _MSC_VER || defined(__IBMC__) | defined(__IBMCPP__) || (defined(__STDC_VERSION__) && __STDC_VERSION >= 199901L) | 
| 18 |  |  |  |  |  |  | /* math.h should give us these as functions or macros. | 
| 19 |  |  |  |  |  |  | * | 
| 20 |  |  |  |  |  |  | *  extern long double fabsl(long double); | 
| 21 |  |  |  |  |  |  | *  extern long double floorl(long double); | 
| 22 |  |  |  |  |  |  | *  extern long double ceill(long double); | 
| 23 |  |  |  |  |  |  | *  extern long double sqrtl(long double); | 
| 24 |  |  |  |  |  |  | *  extern long double powl(long double, long double); | 
| 25 |  |  |  |  |  |  | *  extern long double expl(long double); | 
| 26 |  |  |  |  |  |  | *  extern long double logl(long double); | 
| 27 |  |  |  |  |  |  | */ | 
| 28 |  |  |  |  |  |  | #else | 
| 29 |  |  |  |  |  |  | #define fabsl(x)    (long double) fabs( (double) (x) ) | 
| 30 |  |  |  |  |  |  | #define floorl(x)   (long double) floor( (double) (x) ) | 
| 31 |  |  |  |  |  |  | #define ceill(x)    (long double) ceil( (double) (x) ) | 
| 32 |  |  |  |  |  |  | #define sqrtl(x)    (long double) sqrt( (double) (x) ) | 
| 33 |  |  |  |  |  |  | #define powl(x, y)  (long double) pow( (double) (x), (double) (y) ) | 
| 34 |  |  |  |  |  |  | #define expl(x)     (long double) exp( (double) (x) ) | 
| 35 |  |  |  |  |  |  | #define logl(x)     (long double) log( (double) (x) ) | 
| 36 |  |  |  |  |  |  | #endif | 
| 37 |  |  |  |  |  |  |  | 
| 38 |  |  |  |  |  |  | #ifdef LDBL_INFINITY | 
| 39 |  |  |  |  |  |  | #undef INFINITY | 
| 40 |  |  |  |  |  |  | #define INFINITY LDBL_INFINITY | 
| 41 |  |  |  |  |  |  | #elif !defined(INFINITY) | 
| 42 |  |  |  |  |  |  | #define INFINITY (DBL_MAX + DBL_MAX) | 
| 43 |  |  |  |  |  |  | #endif | 
| 44 |  |  |  |  |  |  | #ifndef LDBL_EPSILON | 
| 45 |  |  |  |  |  |  | #define LDBL_EPSILON 1e-16 | 
| 46 |  |  |  |  |  |  | #endif | 
| 47 |  |  |  |  |  |  | #ifndef LDBL_MAX | 
| 48 |  |  |  |  |  |  | #define LDBL_MAX DBL_MAX | 
| 49 |  |  |  |  |  |  | #endif | 
| 50 |  |  |  |  |  |  |  | 
| 51 |  |  |  |  |  |  | #define KAHAN_INIT(s) \ | 
| 52 |  |  |  |  |  |  | long double s ## _y, s ## _t; \ | 
| 53 |  |  |  |  |  |  | long double s ## _c = 0.0; \ | 
| 54 |  |  |  |  |  |  | long double s = 0.0; | 
| 55 |  |  |  |  |  |  |  | 
| 56 |  |  |  |  |  |  | #define KAHAN_SUM(s, term) \ | 
| 57 |  |  |  |  |  |  | do { \ | 
| 58 |  |  |  |  |  |  | s ## _y = (term) - s ## _c; \ | 
| 59 |  |  |  |  |  |  | s ## _t = s + s ## _y; \ | 
| 60 |  |  |  |  |  |  | s ## _c = (s ## _t - s) - s ## _y; \ | 
| 61 |  |  |  |  |  |  | s = s ## _t; \ | 
| 62 |  |  |  |  |  |  | } while (0) | 
| 63 |  |  |  |  |  |  |  | 
| 64 |  |  |  |  |  |  |  | 
| 65 |  |  |  |  |  |  | #include "ptypes.h" | 
| 66 |  |  |  |  |  |  | #define FUNC_isqrt 1 | 
| 67 |  |  |  |  |  |  | #define FUNC_icbrt 1 | 
| 68 |  |  |  |  |  |  | #define FUNC_lcm_ui 1 | 
| 69 |  |  |  |  |  |  | #define FUNC_ctz 1 | 
| 70 |  |  |  |  |  |  | #define FUNC_log2floor 1 | 
| 71 |  |  |  |  |  |  | #define FUNC_is_perfect_square | 
| 72 |  |  |  |  |  |  | #define FUNC_is_perfect_cube | 
| 73 |  |  |  |  |  |  | #define FUNC_is_perfect_fifth | 
| 74 |  |  |  |  |  |  | #define FUNC_is_perfect_seventh | 
| 75 |  |  |  |  |  |  | #define FUNC_next_prime_in_sieve 1 | 
| 76 |  |  |  |  |  |  | #define FUNC_prev_prime_in_sieve 1 | 
| 77 |  |  |  |  |  |  | #define FUNC_ipow 1 | 
| 78 |  |  |  |  |  |  | #include "util.h" | 
| 79 |  |  |  |  |  |  | #include "sieve.h" | 
| 80 |  |  |  |  |  |  | #include "primality.h" | 
| 81 |  |  |  |  |  |  | #include "cache.h" | 
| 82 |  |  |  |  |  |  | #include "lmo.h" | 
| 83 |  |  |  |  |  |  | #include "prime_nth_count.h" | 
| 84 |  |  |  |  |  |  | #include "factor.h" | 
| 85 |  |  |  |  |  |  | #include "mulmod.h" | 
| 86 |  |  |  |  |  |  | #include "constants.h" | 
| 87 |  |  |  |  |  |  | #include "montmath.h" | 
| 88 |  |  |  |  |  |  | #include "csprng.h" | 
| 89 |  |  |  |  |  |  |  | 
| 90 |  |  |  |  |  |  | static int _verbose = 0; | 
| 91 | 9 |  |  |  |  |  | void _XS_set_verbose(int v) { _verbose = v; } | 
| 92 | 8616 |  |  |  |  |  | int _XS_get_verbose(void) { return _verbose; } | 
| 93 |  |  |  |  |  |  |  | 
| 94 |  |  |  |  |  |  | static int _call_gmp = 0; | 
| 95 | 71 |  |  |  |  |  | void _XS_set_callgmp(int v) { _call_gmp = v; } | 
| 96 | 19406 |  |  |  |  |  | int  _XS_get_callgmp(void) { return _call_gmp; } | 
| 97 |  |  |  |  |  |  |  | 
| 98 |  |  |  |  |  |  | static int _secure = 0; | 
| 99 | 0 |  |  |  |  |  | void _XS_set_secure(void) { _secure = 1; } | 
| 100 | 22 |  |  |  |  |  | int  _XS_get_secure(void) { return _secure; } | 
| 101 |  |  |  |  |  |  |  | 
| 102 |  |  |  |  |  |  | /* We'll use this little static sieve to quickly answer small values of | 
| 103 |  |  |  |  |  |  | *   is_prime, next_prime, prev_prime, prime_count | 
| 104 |  |  |  |  |  |  | * for non-threaded Perl it's basically the same as getting the primary | 
| 105 |  |  |  |  |  |  | * cache.  It guarantees we'll have an answer with no waiting on any version. | 
| 106 |  |  |  |  |  |  | */ | 
| 107 |  |  |  |  |  |  | static const unsigned char prime_sieve30[] = | 
| 108 |  |  |  |  |  |  | {0x01,0x20,0x10,0x81,0x49,0x24,0xc2,0x06,0x2a,0xb0,0xe1,0x0c,0x15,0x59,0x12, | 
| 109 |  |  |  |  |  |  | 0x61,0x19,0xf3,0x2c,0x2c,0xc4,0x22,0xa6,0x5a,0x95,0x98,0x6d,0x42,0x87,0xe1, | 
| 110 |  |  |  |  |  |  | 0x59,0xa9,0xa9,0x1c,0x52,0xd2,0x21,0xd5,0xb3,0xaa,0x26,0x5c,0x0f,0x60,0xfc, | 
| 111 |  |  |  |  |  |  | 0xab,0x5e,0x07,0xd1,0x02,0xbb,0x16,0x99,0x09,0xec,0xc5,0x47,0xb3,0xd4,0xc5, | 
| 112 |  |  |  |  |  |  | 0xba,0xee,0x40,0xab,0x73,0x3e,0x85,0x4c,0x37,0x43,0x73,0xb0,0xde,0xa7,0x8e, | 
| 113 |  |  |  |  |  |  | 0x8e,0x64,0x3e,0xe8,0x10,0xab,0x69,0xe5,0xf7,0x1a,0x7c,0x73,0xb9,0x8d,0x04, | 
| 114 |  |  |  |  |  |  | 0x51,0x9a,0x6d,0x70,0xa7,0x78,0x2d,0x6d,0x27,0x7e,0x9a,0xd9,0x1c,0x5f,0xee, | 
| 115 |  |  |  |  |  |  | 0xc7,0x38,0xd9,0xc3,0x7e,0x14,0x66,0x72,0xae,0x77,0xc1,0xdb,0x0c,0xcc,0xb2, | 
| 116 |  |  |  |  |  |  | 0xa5,0x74,0xe3,0x58,0xd5,0x4b,0xa7,0xb3,0xb1,0xd9,0x09,0xe6,0x7d,0x23,0x7c, | 
| 117 |  |  |  |  |  |  | 0x3c,0xd3,0x0e,0xc7,0xfd,0x4a,0x32,0x32,0xfd,0x4d,0xb5,0x6b,0xf3,0xa8,0xb3, | 
| 118 |  |  |  |  |  |  | 0x85,0xcf,0xbc,0xf4,0x0e,0x34,0xbb,0x93,0xdb,0x07,0xe6,0xfe,0x6a,0x57,0xa3, | 
| 119 |  |  |  |  |  |  | 0x8c,0x15,0x72,0xdb,0x69,0xd4,0xaf,0x59,0xdd,0xe1,0x3b,0x2e,0xb7,0xf9,0x2b, | 
| 120 |  |  |  |  |  |  | 0xc5,0xd0,0x8b,0x63,0xf8,0x95,0xfa,0x77,0x40,0x97,0xea,0xd1,0x9f,0xaa,0x1c, | 
| 121 |  |  |  |  |  |  | 0x48,0xae,0x67,0xf7,0xeb,0x79,0xa5,0x55,0xba,0xb2,0xb6,0x8f,0xd8,0x2d,0x6c, | 
| 122 |  |  |  |  |  |  | 0x2a,0x35,0x54,0xfd,0x7c,0x9e,0xfa,0xdb,0x31,0x78,0xdd,0x3d,0x56,0x52,0xe7, | 
| 123 |  |  |  |  |  |  | 0x73,0xb2,0x87,0x2e,0x76,0xe9,0x4f,0xa8,0x38,0x9d,0x5d,0x3f,0xcb,0xdb,0xad, | 
| 124 |  |  |  |  |  |  | 0x51,0xa5,0xbf,0xcd,0x72,0xde,0xf7,0xbc,0xcb,0x49,0x2d,0x49,0x26,0xe6,0x1e, | 
| 125 |  |  |  |  |  |  | 0x9f,0x98,0xe5,0xc6,0x9f,0x2f,0xbb,0x85,0x6b,0x65,0xf6,0x77,0x7c,0x57,0x8b, | 
| 126 |  |  |  |  |  |  | 0xaa,0xef,0xd8,0x5e,0xa2,0x97,0xe1,0xdc,0x37,0xcd,0x1f,0xe6,0xfc,0xbb,0x8c, | 
| 127 |  |  |  |  |  |  | 0xb7,0x4e,0xc7,0x3c,0x19,0xd5,0xa8,0x9e,0x67,0x4a,0xe3,0xf5,0x97,0x3a,0x7e, | 
| 128 |  |  |  |  |  |  | 0x70,0x53,0xfd,0xd6,0xe5,0xb8,0x1c,0x6b,0xee,0xb1,0x9b,0xd1,0xeb,0x34,0xc2, | 
| 129 |  |  |  |  |  |  | 0x23,0xeb,0x3a,0xf9,0xef,0x16,0xd6,0x4e,0x7d,0x16,0xcf,0xb8,0x1c,0xcb,0xe6, | 
| 130 |  |  |  |  |  |  | 0x3c,0xda,0xf5,0xcf}; | 
| 131 |  |  |  |  |  |  | #define NPRIME_SIEVE30 (sizeof(prime_sieve30)/sizeof(prime_sieve30[0])) | 
| 132 |  |  |  |  |  |  |  | 
| 133 |  |  |  |  |  |  | /* Return of 2 if n is prime, 0 if not.  Do it fast. */ | 
| 134 | 443424 |  |  |  |  |  | int is_prime(UV n) | 
| 135 |  |  |  |  |  |  | { | 
| 136 | 443424 | 100 |  |  |  |  | if (n <= 10) | 
| 137 | 12776 | 100 |  |  |  |  | return (n == 2 || n == 3 || n == 5 || n == 7) ? 2 : 0; | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
| 138 |  |  |  |  |  |  |  | 
| 139 | 430648 | 100 |  |  |  |  | if (n < UVCONST(200000000)) { | 
| 140 | 428708 |  |  |  |  |  | UV d = n/30; | 
| 141 | 428708 |  |  |  |  |  | UV m = n - d*30; | 
| 142 | 428708 |  |  |  |  |  | unsigned char mtab = masktab30[m];  /* Bitmask in mod30 wheel */ | 
| 143 |  |  |  |  |  |  | const unsigned char* sieve; | 
| 144 |  |  |  |  |  |  |  | 
| 145 |  |  |  |  |  |  | /* Return 0 if a multiple of 2, 3, or 5 */ | 
| 146 | 428708 | 100 |  |  |  |  | if (mtab == 0) | 
| 147 | 416832 |  |  |  |  |  | return 0; | 
| 148 |  |  |  |  |  |  |  | 
| 149 |  |  |  |  |  |  | /* Check static tiny sieve */ | 
| 150 | 188294 | 100 |  |  |  |  | if (d < NPRIME_SIEVE30) | 
| 151 | 18167 | 100 |  |  |  |  | return (prime_sieve30[d] & mtab) ? 0 : 2; | 
| 152 |  |  |  |  |  |  |  | 
| 153 | 170127 | 100 |  |  |  |  | if (!(n%7) || !(n%11) || !(n%13)) return 0; | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
| 154 |  |  |  |  |  |  |  | 
| 155 |  |  |  |  |  |  | /* Check primary cache */ | 
| 156 | 117105 | 100 |  |  |  |  | if (n <= get_prime_cache(0,0)) { | 
| 157 | 105229 |  |  |  |  |  | int isprime = -1; | 
| 158 | 105229 | 50 |  |  |  |  | if (n <= get_prime_cache(0, &sieve)) | 
| 159 | 105229 | 100 |  |  |  |  | isprime = 2*((sieve[d] & mtab) == 0); | 
| 160 |  |  |  |  |  |  | release_prime_cache(sieve); | 
| 161 | 105229 | 50 |  |  |  |  | if (isprime >= 0) | 
| 162 | 117105 |  |  |  |  |  | return isprime; | 
| 163 |  |  |  |  |  |  | } | 
| 164 |  |  |  |  |  |  | } | 
| 165 | 13816 |  |  |  |  |  | return is_prob_prime(n); | 
| 166 |  |  |  |  |  |  | } | 
| 167 |  |  |  |  |  |  |  | 
| 168 |  |  |  |  |  |  |  | 
| 169 | 22483 |  |  |  |  |  | UV next_prime(UV n) | 
| 170 |  |  |  |  |  |  | { | 
| 171 |  |  |  |  |  |  | UV m, next; | 
| 172 |  |  |  |  |  |  |  | 
| 173 | 22483 | 100 |  |  |  |  | if (n < 30*NPRIME_SIEVE30) { | 
| 174 | 16226 |  |  |  |  |  | next = next_prime_in_sieve(prime_sieve30, n, 30*NPRIME_SIEVE30); | 
| 175 | 16226 | 100 |  |  |  |  | if (next != 0) return next; | 
| 176 |  |  |  |  |  |  | } | 
| 177 |  |  |  |  |  |  |  | 
| 178 | 6258 | 50 |  |  |  |  | if (n >= MPU_MAX_PRIME) return 0; /* Overflow */ | 
| 179 |  |  |  |  |  |  |  | 
| 180 | 6258 | 100 |  |  |  |  | if (n < get_prime_cache(0,0)) { | 
| 181 |  |  |  |  |  |  | const unsigned char* sieve; | 
| 182 | 4055 |  |  |  |  |  | UV sieve_size = get_prime_cache(0, &sieve); | 
| 183 | 4055 | 50 |  |  |  |  | next = (n < sieve_size)  ?  next_prime_in_sieve(sieve, n, sieve_size)  :  0; | 
| 184 |  |  |  |  |  |  | release_prime_cache(sieve); | 
| 185 | 4055 | 50 |  |  |  |  | if (next != 0) return next; | 
| 186 |  |  |  |  |  |  | } | 
| 187 |  |  |  |  |  |  |  | 
| 188 | 2203 |  |  |  |  |  | m = n % 30; | 
| 189 |  |  |  |  |  |  | do { /* Move forward one. */ | 
| 190 | 9727 |  |  |  |  |  | n += wheeladvance30[m]; | 
| 191 | 9727 |  |  |  |  |  | m = nextwheel30[m]; | 
| 192 | 9727 | 100 |  |  |  |  | } while (!is_prob_prime(n)); | 
| 193 | 2203 |  |  |  |  |  | return n; | 
| 194 |  |  |  |  |  |  | } | 
| 195 |  |  |  |  |  |  |  | 
| 196 |  |  |  |  |  |  |  | 
| 197 | 25776 |  |  |  |  |  | UV prev_prime(UV n) | 
| 198 |  |  |  |  |  |  | { | 
| 199 |  |  |  |  |  |  | UV m, prev; | 
| 200 |  |  |  |  |  |  |  | 
| 201 | 25776 | 100 |  |  |  |  | if (n < 30*NPRIME_SIEVE30) | 
| 202 | 17606 |  |  |  |  |  | return prev_prime_in_sieve(prime_sieve30, n); | 
| 203 |  |  |  |  |  |  |  | 
| 204 | 8170 | 100 |  |  |  |  | if (n < get_prime_cache(0,0)) { | 
| 205 |  |  |  |  |  |  | const unsigned char* sieve; | 
| 206 | 4007 |  |  |  |  |  | UV sieve_size = get_prime_cache(0, &sieve); | 
| 207 | 4007 | 50 |  |  |  |  | prev = (n < sieve_size)  ?  prev_prime_in_sieve(sieve, n)  :  0; | 
| 208 |  |  |  |  |  |  | release_prime_cache(sieve); | 
| 209 | 4007 | 50 |  |  |  |  | if (prev != 0) return prev; | 
| 210 |  |  |  |  |  |  | } | 
| 211 |  |  |  |  |  |  |  | 
| 212 | 4163 |  |  |  |  |  | m = n % 30; | 
| 213 |  |  |  |  |  |  | do { /* Move back one. */ | 
| 214 | 14268 |  |  |  |  |  | n -= wheelretreat30[m]; | 
| 215 | 14268 |  |  |  |  |  | m = prevwheel30[m]; | 
| 216 | 14268 | 100 |  |  |  |  | } while (!is_prob_prime(n)); | 
| 217 | 4163 |  |  |  |  |  | return n; | 
| 218 |  |  |  |  |  |  | } | 
| 219 |  |  |  |  |  |  |  | 
| 220 |  |  |  |  |  |  | /******************************************************************************/ | 
| 221 |  |  |  |  |  |  | /*                                 PRINTING                                   */ | 
| 222 |  |  |  |  |  |  | /******************************************************************************/ | 
| 223 |  |  |  |  |  |  |  | 
| 224 | 0 |  |  |  |  |  | static int my_sprint(char* ptr, UV val) { | 
| 225 |  |  |  |  |  |  | int nchars; | 
| 226 |  |  |  |  |  |  | UV t; | 
| 227 | 0 |  |  |  |  |  | char *s = ptr; | 
| 228 |  |  |  |  |  |  | do { | 
| 229 | 0 |  |  |  |  |  | t = val / 10; *s++ = (char) ('0' + val - 10 * t); | 
| 230 | 0 | 0 |  |  |  |  | } while ((val = t)); | 
| 231 | 0 |  |  |  |  |  | nchars = s - ptr + 1;  *s = '\n'; | 
| 232 | 0 | 0 |  |  |  |  | while (--s > ptr) { char c = *s; *s = *ptr; *ptr++ = c; } | 
| 233 | 0 |  |  |  |  |  | return nchars; | 
| 234 |  |  |  |  |  |  | } | 
| 235 | 0 |  |  |  |  |  | static char* write_buf(int fd, char* buf, char* bend) { | 
| 236 | 0 |  |  |  |  |  | int res = (int) write(fd, buf, bend-buf); | 
| 237 | 0 | 0 |  |  |  |  | if (res == -1) croak("print_primes write error"); | 
| 238 | 0 |  |  |  |  |  | return buf; | 
| 239 |  |  |  |  |  |  | } | 
| 240 | 0 |  |  |  |  |  | void print_primes(UV low, UV high, int fd) { | 
| 241 |  |  |  |  |  |  | char buf[8000+25]; | 
| 242 | 0 |  |  |  |  |  | char* bend = buf; | 
| 243 | 0 | 0 |  |  |  |  | if ((low <= 2) && (high >= 2)) bend += my_sprint(bend,2); | 
|  |  | 0 |  |  |  |  |  | 
| 244 | 0 | 0 |  |  |  |  | if ((low <= 3) && (high >= 3)) bend += my_sprint(bend,3); | 
|  |  | 0 |  |  |  |  |  | 
| 245 | 0 | 0 |  |  |  |  | if ((low <= 5) && (high >= 5)) bend += my_sprint(bend,5); | 
|  |  | 0 |  |  |  |  |  | 
| 246 | 0 | 0 |  |  |  |  | if (low < 7) low = 7; | 
| 247 |  |  |  |  |  |  |  | 
| 248 | 0 | 0 |  |  |  |  | if (low <= high) { | 
| 249 |  |  |  |  |  |  | unsigned char* segment; | 
| 250 |  |  |  |  |  |  | UV seg_base, seg_low, seg_high; | 
| 251 | 0 |  |  |  |  |  | void* ctx = start_segment_primes(low, high, &segment); | 
| 252 | 0 | 0 |  |  |  |  | while (next_segment_primes(ctx, &seg_base, &seg_low, &seg_high)) { | 
| 253 | 0 | 0 |  |  |  |  | START_DO_FOR_EACH_SIEVE_PRIME( segment, seg_base, seg_low, seg_high ) | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
| 254 | 0 |  |  |  |  |  | bend += my_sprint(bend,p); | 
| 255 | 0 | 0 |  |  |  |  | if (bend-buf > 8000) { bend = write_buf(fd, buf, bend); } | 
| 256 | 0 |  |  |  |  |  | END_DO_FOR_EACH_SIEVE_PRIME | 
| 257 |  |  |  |  |  |  | } | 
| 258 | 0 |  |  |  |  |  | end_segment_primes(ctx); | 
| 259 |  |  |  |  |  |  | } | 
| 260 | 0 | 0 |  |  |  |  | if (bend > buf) { bend = write_buf(fd, buf, bend); } | 
| 261 | 0 |  |  |  |  |  | } | 
| 262 |  |  |  |  |  |  |  | 
| 263 |  |  |  |  |  |  |  | 
| 264 |  |  |  |  |  |  | /* Return a char array with lo-hi+1 elements. mu[k-lo] = µ(k) for k = lo .. hi. | 
| 265 |  |  |  |  |  |  | * It is the callers responsibility to call Safefree on the result. */ | 
| 266 |  |  |  |  |  |  | #define PGTLO(p,lo)  ((p) >= lo) ? (p) : ((p)*(lo/(p)) + ((lo%(p))?(p):0)) | 
| 267 |  |  |  |  |  |  | #define P2GTLO(pinit, p, lo) \ | 
| 268 |  |  |  |  |  |  | ((pinit) >= lo) ? (pinit) : ((p)*(lo/(p)) + ((lo%(p))?(p):0)) | 
| 269 | 69 |  |  |  |  |  | signed char* _moebius_range(UV lo, UV hi) | 
| 270 |  |  |  |  |  |  | { | 
| 271 |  |  |  |  |  |  | signed char* mu; | 
| 272 |  |  |  |  |  |  | UV i; | 
| 273 | 69 |  |  |  |  |  | UV sqrtn = isqrt(hi); | 
| 274 |  |  |  |  |  |  |  | 
| 275 |  |  |  |  |  |  | /* Kuznetsov indicates that the Deléglise & Rivat (1996) method can be | 
| 276 |  |  |  |  |  |  | * modified to work on logs, which allows us to operate with no | 
| 277 |  |  |  |  |  |  | * intermediate memory at all.  Same time as the D&R method, less memory. */ | 
| 278 |  |  |  |  |  |  | unsigned char logp; | 
| 279 |  |  |  |  |  |  | UV nextlog; | 
| 280 |  |  |  |  |  |  |  | 
| 281 | 69 |  |  |  |  |  | Newz(0, mu, hi-lo+1, signed char); | 
| 282 | 69 | 100 |  |  |  |  | if (sqrtn*sqrtn != hi) sqrtn++;  /* ceil sqrtn */ | 
| 283 |  |  |  |  |  |  |  | 
| 284 | 69 |  |  |  |  |  | logp = 1; nextlog = 3; /* 2+1 */ | 
| 285 | 543 | 50 |  |  |  |  | START_DO_FOR_EACH_PRIME(2, sqrtn) { | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 50 |  |  |  |  |  | 
|  |  | 50 |  |  |  |  |  | 
|  |  | 50 |  |  |  |  |  | 
|  |  | 50 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
| 286 | 474 |  |  |  |  |  | UV p2 = p*p; | 
| 287 | 474 | 100 |  |  |  |  | if (p > nextlog) { | 
| 288 | 88 |  |  |  |  |  | logp += 2;   /* logp is 1 | ceil(log(p)/log(2)) */ | 
| 289 | 88 |  |  |  |  |  | nextlog = ((nextlog-1)*4)+1; | 
| 290 |  |  |  |  |  |  | } | 
| 291 | 147483 | 50 |  |  |  |  | for (i = PGTLO(p, lo); i <= hi; i += p) | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
| 292 | 147009 |  |  |  |  |  | mu[i-lo] += logp; | 
| 293 | 38243 | 50 |  |  |  |  | for (i = PGTLO(p2, lo); i <= hi; i += p2) | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
| 294 | 37769 |  |  |  |  |  | mu[i-lo] = 0x80; | 
| 295 | 474 |  |  |  |  |  | } END_DO_FOR_EACH_PRIME | 
| 296 |  |  |  |  |  |  |  | 
| 297 | 69 | 100 |  |  |  |  | logp = log2floor(lo); | 
| 298 | 69 |  |  |  |  |  | nextlog = UVCONST(2) << logp; | 
| 299 | 84731 | 100 |  |  |  |  | for (i = lo; i <= hi; i++) { | 
| 300 | 84662 |  |  |  |  |  | unsigned char a = mu[i-lo]; | 
| 301 | 84662 | 100 |  |  |  |  | if (i >= nextlog) {  logp++;  nextlog *= 2;  } /* logp is log(p)/log(2) */ | 
| 302 | 84662 | 100 |  |  |  |  | if (a & 0x80)       { a = 0; } | 
| 303 | 51559 | 100 |  |  |  |  | else if (a >= logp) { a =  1 - 2*(a&1); } | 
| 304 | 39210 |  |  |  |  |  | else                { a = -1 + 2*(a&1); } | 
| 305 | 84662 |  |  |  |  |  | mu[i-lo] = a; | 
| 306 |  |  |  |  |  |  | } | 
| 307 | 69 | 100 |  |  |  |  | if (lo == 0)  mu[0] = 0; | 
| 308 |  |  |  |  |  |  |  | 
| 309 | 69 |  |  |  |  |  | return mu; | 
| 310 |  |  |  |  |  |  | } | 
| 311 |  |  |  |  |  |  |  | 
| 312 | 8 |  |  |  |  |  | UV* _totient_range(UV lo, UV hi) { | 
| 313 |  |  |  |  |  |  | UV* totients; | 
| 314 |  |  |  |  |  |  | UV i, seg_base, seg_low, seg_high; | 
| 315 |  |  |  |  |  |  | unsigned char* segment; | 
| 316 |  |  |  |  |  |  | void* ctx; | 
| 317 |  |  |  |  |  |  |  | 
| 318 | 8 | 50 |  |  |  |  | if (hi < lo) croak("_totient_range error hi %"UVuf" < lo %"UVuf"\n", hi, lo); | 
| 319 | 8 | 50 |  |  |  |  | New(0, totients, hi-lo+1, UV); | 
| 320 |  |  |  |  |  |  |  | 
| 321 |  |  |  |  |  |  | /* Do via factoring if very small or if we have a small range */ | 
| 322 | 8 | 100 |  |  |  |  | if (hi < 100 || (hi-lo) < 10 || hi/(hi-lo+1) > 1000) { | 
|  |  | 50 |  |  |  |  |  | 
|  |  | 50 |  |  |  |  |  | 
| 323 | 41 | 100 |  |  |  |  | for (i = lo; i <= hi; i++) | 
| 324 | 35 |  |  |  |  |  | totients[i-lo] = totient(i); | 
| 325 | 6 |  |  |  |  |  | return totients; | 
| 326 |  |  |  |  |  |  | } | 
| 327 |  |  |  |  |  |  |  | 
| 328 |  |  |  |  |  |  | /* If doing a full sieve, do it monolithic.  Faster. */ | 
| 329 | 2 | 100 |  |  |  |  | if (lo == 0) { | 
| 330 |  |  |  |  |  |  | UV* prime; | 
| 331 | 1 |  |  |  |  |  | double loghi = log(hi); | 
| 332 | 1 | 50 |  |  |  |  | UV max_index = (hi < 67)     ? 18 | 
|  |  | 50 |  |  |  |  |  | 
| 333 | 1 |  |  |  |  |  | : (hi < 355991) ? 15+(hi/(loghi-1.09)) | 
| 334 | 0 |  |  |  |  |  | : (hi/loghi) * (1.0+1.0/loghi+2.51/(loghi*loghi)); | 
| 335 | 1 |  |  |  |  |  | UV j, index, nprimes = 0; | 
| 336 |  |  |  |  |  |  |  | 
| 337 | 1 | 50 |  |  |  |  | New(0, prime, max_index, UV);  /* could use prime_count_upper(hi) */ | 
| 338 | 1 |  |  |  |  |  | memset(totients, 0, (hi-lo+1) * sizeof(UV)); | 
| 339 | 120 | 100 |  |  |  |  | for (i = 2; i <= hi/2; i++) { | 
| 340 | 119 |  |  |  |  |  | index = 2*i; | 
| 341 | 119 | 100 |  |  |  |  | if ( !(i&1) ) { | 
| 342 | 60 | 100 |  |  |  |  | if (i == 2) { totients[2] = 1; prime[nprimes++] = 2; } | 
| 343 | 60 |  |  |  |  |  | totients[index] = totients[i]*2; | 
| 344 |  |  |  |  |  |  | } else { | 
| 345 | 59 | 100 |  |  |  |  | if (totients[i] == 0) { | 
| 346 | 29 |  |  |  |  |  | totients[i] = i-1; | 
| 347 | 29 |  |  |  |  |  | prime[nprimes++] = i; | 
| 348 |  |  |  |  |  |  | } | 
| 349 | 167 | 50 |  |  |  |  | for (j=0; j < nprimes && index <= hi; index = i*prime[++j]) { | 
|  |  | 100 |  |  |  |  |  | 
| 350 | 127 | 100 |  |  |  |  | if (i % prime[j] == 0) { | 
| 351 | 19 |  |  |  |  |  | totients[index] = totients[i]*prime[j]; | 
| 352 | 19 |  |  |  |  |  | break; | 
| 353 |  |  |  |  |  |  | } else { | 
| 354 | 108 |  |  |  |  |  | totients[index] = totients[i]*(prime[j]-1); | 
| 355 |  |  |  |  |  |  | } | 
| 356 |  |  |  |  |  |  | } | 
| 357 |  |  |  |  |  |  | } | 
| 358 |  |  |  |  |  |  | } | 
| 359 | 1 |  |  |  |  |  | Safefree(prime); | 
| 360 |  |  |  |  |  |  | /* All totient values have been filled in except the primes.  Mark them. */ | 
| 361 | 61 | 100 |  |  |  |  | for (i = ((hi/2) + 1) | 1; i <= hi; i += 2) | 
| 362 | 60 | 100 |  |  |  |  | if (totients[i] == 0) | 
| 363 | 22 |  |  |  |  |  | totients[i] = i-1; | 
| 364 | 1 |  |  |  |  |  | totients[1] = 1; | 
| 365 | 1 |  |  |  |  |  | totients[0] = 0; | 
| 366 | 1 |  |  |  |  |  | return totients; | 
| 367 |  |  |  |  |  |  | } | 
| 368 |  |  |  |  |  |  |  | 
| 369 | 26 | 100 |  |  |  |  | for (i = lo; i <= hi; i++) { | 
| 370 | 25 |  |  |  |  |  | UV v = i; | 
| 371 | 25 | 100 |  |  |  |  | if (i % 2 == 0)  v -= v/2; | 
| 372 | 25 | 100 |  |  |  |  | if (i % 3 == 0)  v -= v/3; | 
| 373 | 25 | 100 |  |  |  |  | if (i % 5 == 0)  v -= v/5; | 
| 374 | 25 |  |  |  |  |  | totients[i-lo] = v; | 
| 375 |  |  |  |  |  |  | } | 
| 376 |  |  |  |  |  |  |  | 
| 377 | 1 |  |  |  |  |  | ctx = start_segment_primes(7, hi/2, &segment); | 
| 378 | 2 | 100 |  |  |  |  | while (next_segment_primes(ctx, &seg_base, &seg_low, &seg_high)) { | 
| 379 | 137 | 50 |  |  |  |  | START_DO_FOR_EACH_SIEVE_PRIME( segment, seg_base, seg_low, seg_high ) { | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 50 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
| 380 | 163 | 100 |  |  |  |  | for (i = P2GTLO(2*p,p,lo); i <= hi; i += p) | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
| 381 | 31 |  |  |  |  |  | totients[i-lo] -= totients[i-lo]/p; | 
| 382 | 4 |  |  |  |  |  | } END_DO_FOR_EACH_SIEVE_PRIME | 
| 383 |  |  |  |  |  |  | } | 
| 384 | 1 |  |  |  |  |  | end_segment_primes(ctx); | 
| 385 |  |  |  |  |  |  |  | 
| 386 |  |  |  |  |  |  | /* Fill in all primes */ | 
| 387 | 14 | 100 |  |  |  |  | for (i = lo | 1; i <= hi; i += 2) | 
| 388 | 13 | 100 |  |  |  |  | if (totients[i-lo] == i) | 
| 389 | 2 |  |  |  |  |  | totients[i-lo]--; | 
| 390 | 1 | 50 |  |  |  |  | if (lo <= 1) totients[1-lo] = 1; | 
| 391 |  |  |  |  |  |  |  | 
| 392 | 8 |  |  |  |  |  | return totients; | 
| 393 |  |  |  |  |  |  | } | 
| 394 |  |  |  |  |  |  |  | 
| 395 | 45 |  |  |  |  |  | IV mertens(UV n) { | 
| 396 |  |  |  |  |  |  | /* See Deléglise and Rivat (1996) for O(n^2/3 log(log(n))^1/3) algorithm. | 
| 397 |  |  |  |  |  |  | * This implementation uses their lemma 2.1 directly, so is ~ O(n). | 
| 398 |  |  |  |  |  |  | * In serial it is quite a bit faster than segmented summation of mu | 
| 399 |  |  |  |  |  |  | * ranges, though the latter seems to be a favored method for GPUs. | 
| 400 |  |  |  |  |  |  | */ | 
| 401 |  |  |  |  |  |  | UV u, j, m, nmk, maxmu; | 
| 402 |  |  |  |  |  |  | signed char* mu; | 
| 403 |  |  |  |  |  |  | short* M;   /* 16 bits is enough range for all 32-bit M => 64-bit n */ | 
| 404 |  |  |  |  |  |  | IV sum; | 
| 405 |  |  |  |  |  |  |  | 
| 406 | 45 | 100 |  |  |  |  | if (n <= 1)  return n; | 
| 407 | 44 |  |  |  |  |  | u = isqrt(n); | 
| 408 | 44 |  |  |  |  |  | maxmu = (n/(u+1));              /* maxmu lets us handle u < sqrt(n) */ | 
| 409 | 44 | 100 |  |  |  |  | if (maxmu < u) maxmu = u; | 
| 410 | 44 |  |  |  |  |  | mu = _moebius_range(0, maxmu); | 
| 411 | 44 | 50 |  |  |  |  | New(0, M, maxmu+1, short); | 
| 412 | 44 |  |  |  |  |  | M[0] = 0; | 
| 413 | 56574 | 100 |  |  |  |  | for (j = 1; j <= maxmu; j++) | 
| 414 | 56530 |  |  |  |  |  | M[j] = M[j-1] + mu[j]; | 
| 415 | 44 |  |  |  |  |  | sum = M[u]; | 
| 416 | 56574 | 100 |  |  |  |  | for (m = 1; m <= u; m++) { | 
| 417 | 56530 | 100 |  |  |  |  | if (mu[m] != 0) { | 
| 418 | 34416 |  |  |  |  |  | IV inner_sum = 0; | 
| 419 | 34416 |  |  |  |  |  | UV lower = (u/m) + 1; | 
| 420 | 34416 |  |  |  |  |  | UV last_nmk = n/(m*lower); | 
| 421 | 34416 |  |  |  |  |  | UV this_k = 0; | 
| 422 | 34416 |  |  |  |  |  | UV next_k = n/(m*1); | 
| 423 | 34416 |  |  |  |  |  | UV nmkm = m * 2; | 
| 424 | 228957633 | 100 |  |  |  |  | for (nmk = 1; nmk <= last_nmk; nmk++, nmkm += m) { | 
| 425 | 228923217 |  |  |  |  |  | this_k = next_k; | 
| 426 | 228923217 |  |  |  |  |  | next_k = n/nmkm; | 
| 427 | 228923217 |  |  |  |  |  | inner_sum += M[nmk] * (this_k - next_k); | 
| 428 |  |  |  |  |  |  | } | 
| 429 | 34416 | 100 |  |  |  |  | sum += (mu[m] > 0) ? -inner_sum : inner_sum; | 
| 430 |  |  |  |  |  |  | } | 
| 431 |  |  |  |  |  |  | } | 
| 432 | 44 |  |  |  |  |  | Safefree(M); | 
| 433 | 44 |  |  |  |  |  | Safefree(mu); | 
| 434 | 44 |  |  |  |  |  | return sum; | 
| 435 |  |  |  |  |  |  | } | 
| 436 |  |  |  |  |  |  |  | 
| 437 |  |  |  |  |  |  | /* There are at least 4 ways to do this, plus hybrids. | 
| 438 |  |  |  |  |  |  | * 1) use a table.  Great for 32-bit, too big for 64-bit. | 
| 439 |  |  |  |  |  |  | * 2) Use pow() to check.  Relatively slow and FP is always dangerous. | 
| 440 |  |  |  |  |  |  | * 3) factor or trial factor.  Slow for 64-bit. | 
| 441 |  |  |  |  |  |  | * 4) Dietzfelbinger algorithm 2.3.5.  Quite slow. | 
| 442 |  |  |  |  |  |  | * This currently uses a hybrid of 1 and 2. | 
| 443 |  |  |  |  |  |  | */ | 
| 444 | 15427 |  |  |  |  |  | int powerof(UV n) { | 
| 445 |  |  |  |  |  |  | UV t; | 
| 446 | 15427 | 100 |  |  |  |  | if ((n <= 3) || (n == UV_MAX)) return 1; | 
|  |  | 50 |  |  |  |  |  | 
| 447 | 15343 | 100 |  |  |  |  | if ((n & (n-1)) == 0)          return ctz(n);  /* powers of 2    */ | 
|  |  | 50 |  |  |  |  |  | 
| 448 | 15326 | 100 |  |  |  |  | if (is_perfect_square(n))      return 2 * powerof(isqrt(n)); | 
| 449 | 15082 | 100 |  |  |  |  | if (is_perfect_cube(n))        return 3 * powerof(icbrt(n)); | 
| 450 |  |  |  |  |  |  |  | 
| 451 |  |  |  |  |  |  | /* Simple rejection filter for non-powers of 5-37.  Rejects 47.85%. */ | 
| 452 | 15032 | 100 |  |  |  |  | t = n & 511; if ((t*77855451) & (t*4598053) & 862)  return 1; | 
| 453 |  |  |  |  |  |  |  | 
| 454 | 6716 | 100 |  |  |  |  | if (is_perfect_fifth(n))       return 5 * powerof(rootof(n,5)); | 
| 455 | 6701 | 100 |  |  |  |  | if (is_perfect_seventh(n))     return 7 * powerof(rootof(n,7)); | 
| 456 |  |  |  |  |  |  |  | 
| 457 | 6694 | 100 |  |  |  |  | if (n > 177146 && n <= UVCONST(1977326743)) { | 
|  |  | 100 |  |  |  |  |  | 
| 458 | 1853 |  |  |  |  |  | switch (n) { /* Check for powers of 11, 13, 17, 19 within 32 bits */ | 
| 459 | 4 |  |  |  |  |  | case 177147: case 48828125: case 362797056: case 1977326743: return 11; | 
| 460 | 0 |  |  |  |  |  | case 1594323: case 1220703125: return 13; | 
| 461 | 0 |  |  |  |  |  | case 129140163: return 17; | 
| 462 | 0 |  |  |  |  |  | case 1162261467: return 19; | 
| 463 | 1849 |  |  |  |  |  | default:  break; | 
| 464 |  |  |  |  |  |  | } | 
| 465 |  |  |  |  |  |  | } | 
| 466 |  |  |  |  |  |  | #if BITS_PER_WORD == 64 | 
| 467 | 6690 | 100 |  |  |  |  | if (n >= UVCONST(8589934592)) { | 
| 468 |  |  |  |  |  |  | /* The Bloom filters reject about 90% of inputs each, about 99% for two. | 
| 469 |  |  |  |  |  |  | * Bach/Sorenson type sieves do about as well, but are much slower due | 
| 470 |  |  |  |  |  |  | * to using a powmod. */ | 
| 471 | 218 | 100 |  |  |  |  | if ( (t = n %121, !((t*19706187) & (t*61524433) & 876897796)) && | 
|  |  | 50 |  |  |  |  |  | 
| 472 | 2 |  |  |  |  |  | (t = n % 89, !((t*28913398) & (t*69888189) & 2705511937U)) ) { | 
| 473 |  |  |  |  |  |  | /* (t = n % 67, !((t*117621317) & (t*48719734) & 537242019)) ) { */ | 
| 474 | 0 |  |  |  |  |  | UV root = rootof(n,11); | 
| 475 | 0 | 0 |  |  |  |  | if (n == ipow(root,11)) return 11; | 
| 476 |  |  |  |  |  |  | } | 
| 477 | 218 | 100 |  |  |  |  | if ( (t = n %131, !((t*1545928325) & (t*1355660813) & 2771533888U)) && | 
|  |  | 100 |  |  |  |  |  | 
| 478 | 34 |  |  |  |  |  | (t = n % 79, !((t*48902028) & (t*48589927) & 404082779)) ) { | 
| 479 |  |  |  |  |  |  | /* (t = n % 53, !((t*79918293) & (t*236846524) & 694943819)) ) { */ | 
| 480 | 2 |  |  |  |  |  | UV root = rootof(n,13); | 
| 481 | 2 | 50 |  |  |  |  | if (n == ipow(root,13)) return 13; | 
| 482 |  |  |  |  |  |  | } | 
| 483 | 216 |  |  |  |  |  | switch (n) { | 
| 484 |  |  |  |  |  |  | case UVCONST(762939453125): | 
| 485 |  |  |  |  |  |  | case UVCONST(16926659444736): | 
| 486 |  |  |  |  |  |  | case UVCONST(232630513987207): | 
| 487 |  |  |  |  |  |  | case UVCONST(100000000000000000): | 
| 488 |  |  |  |  |  |  | case UVCONST(505447028499293771): | 
| 489 |  |  |  |  |  |  | case UVCONST(2218611106740436992): | 
| 490 | 5 |  |  |  |  |  | case UVCONST(8650415919381337933):  return 17; | 
| 491 |  |  |  |  |  |  | case UVCONST(19073486328125): | 
| 492 |  |  |  |  |  |  | case UVCONST(609359740010496): | 
| 493 |  |  |  |  |  |  | case UVCONST(11398895185373143): | 
| 494 | 4 |  |  |  |  |  | case UVCONST(10000000000000000000): return 19; | 
| 495 |  |  |  |  |  |  | case UVCONST(94143178827): | 
| 496 |  |  |  |  |  |  | case UVCONST(11920928955078125): | 
| 497 | 1 |  |  |  |  |  | case UVCONST(789730223053602816):   return 23; | 
| 498 | 0 |  |  |  |  |  | case UVCONST(68630377364883):       return 29; | 
| 499 | 0 |  |  |  |  |  | case UVCONST(617673396283947):      return 31; | 
| 500 | 0 |  |  |  |  |  | case UVCONST(450283905890997363):   return 37; | 
| 501 | 206 |  |  |  |  |  | default:  break; | 
| 502 |  |  |  |  |  |  | } | 
| 503 |  |  |  |  |  |  | } | 
| 504 |  |  |  |  |  |  | #endif | 
| 505 | 6678 |  |  |  |  |  | return 1; | 
| 506 |  |  |  |  |  |  | } | 
| 507 | 10465 |  |  |  |  |  | int is_power(UV n, UV a) | 
| 508 |  |  |  |  |  |  | { | 
| 509 |  |  |  |  |  |  | int ret; | 
| 510 | 10465 | 100 |  |  |  |  | if (a > 0) { | 
| 511 | 22 | 50 |  |  |  |  | if (a == 1 || n <= 1) return 1; | 
|  |  | 100 |  |  |  |  |  | 
| 512 | 19 | 100 |  |  |  |  | if ((a % 2) == 0) | 
| 513 | 17 | 100 |  |  |  |  | return !is_perfect_square(n) ? 0 : (a == 2) ? 1 : is_power(isqrt(n),a>>1); | 
|  |  | 50 |  |  |  |  |  | 
| 514 | 2 | 50 |  |  |  |  | if ((a % 3) == 0) | 
| 515 | 2 | 50 |  |  |  |  | return !is_perfect_cube(n) ? 0 : (a == 3) ? 1 : is_power(icbrt(n),a/3); | 
|  |  | 50 |  |  |  |  |  | 
| 516 | 0 | 0 |  |  |  |  | if ((a % 5) == 0) | 
| 517 | 0 | 0 |  |  |  |  | return !is_perfect_fifth(n) ? 0 : (a == 5) ? 1 :is_power(rootof(n,5),a/5); | 
|  |  | 0 |  |  |  |  |  | 
| 518 |  |  |  |  |  |  | } | 
| 519 | 10443 |  |  |  |  |  | ret = powerof(n); | 
| 520 | 10443 | 50 |  |  |  |  | if (a != 0) return !(ret % a);  /* Is the max power divisible by a? */ | 
| 521 | 10443 | 100 |  |  |  |  | return (ret == 1) ? 0 : ret; | 
| 522 |  |  |  |  |  |  | } | 
| 523 |  |  |  |  |  |  |  | 
| 524 |  |  |  |  |  |  | #if BITS_PER_WORD == 64 | 
| 525 |  |  |  |  |  |  | #define ROOT_MAX_3 41 | 
| 526 |  |  |  |  |  |  | static const uint32_t root_max[ROOT_MAX_3] = {0,0,0,2642245,65535,7131,1625,565,255,138,84,56,40,30,23,19,15,13,11,10,9,8,7,6,6,5,5,5,4,4,4,4,3,3,3,3,3,3,3,3,3}; | 
| 527 |  |  |  |  |  |  | #else | 
| 528 |  |  |  |  |  |  | #define ROOT_MAX_3 21 | 
| 529 |  |  |  |  |  |  | static const uint32_t root_max[ROOT_MAX_3] = {0,0,0,1625,255,84,40,23,15,11,9,7,6,5,4,4,3,3,3,3,3}; | 
| 530 |  |  |  |  |  |  | #endif | 
| 531 |  |  |  |  |  |  |  | 
| 532 | 462 |  |  |  |  |  | UV rootof(UV n, UV k) { | 
| 533 |  |  |  |  |  |  | UV lo, hi, max; | 
| 534 | 462 | 50 |  |  |  |  | if (k == 0) return 0; | 
| 535 | 462 | 100 |  |  |  |  | if (k == 1) return n; | 
| 536 | 443 | 100 |  |  |  |  | if (k == 2) return isqrt(n); | 
| 537 | 327 | 100 |  |  |  |  | if (k == 3) return icbrt(n); | 
| 538 |  |  |  |  |  |  |  | 
| 539 |  |  |  |  |  |  | /* Bracket between powers of 2, but never exceed max power so ipow works */ | 
| 540 | 318 | 100 |  |  |  |  | max = 1 + ((k >= ROOT_MAX_3) ? 2 : root_max[k]); | 
| 541 | 318 | 50 |  |  |  |  | lo = UVCONST(1) << (log2floor(n)/k); | 
| 542 | 318 |  |  |  |  |  | hi = ((lo*2) < max) ? lo*2 : max; | 
| 543 |  |  |  |  |  |  |  | 
| 544 |  |  |  |  |  |  | /* Binary search */ | 
| 545 | 1116 | 100 |  |  |  |  | while (lo < hi) { | 
| 546 | 798 |  |  |  |  |  | UV mid = lo + (hi-lo)/2; | 
| 547 | 798 | 100 |  |  |  |  | if (ipow(mid,k) <= n) lo = mid+1; | 
| 548 | 355 |  |  |  |  |  | else                  hi = mid; | 
| 549 |  |  |  |  |  |  | } | 
| 550 | 318 |  |  |  |  |  | return lo-1; | 
| 551 |  |  |  |  |  |  | } | 
| 552 |  |  |  |  |  |  |  | 
| 553 | 10274 |  |  |  |  |  | int primepower(UV n, UV* prime) | 
| 554 |  |  |  |  |  |  | { | 
| 555 | 10274 |  |  |  |  |  | int power = 0; | 
| 556 | 10274 | 100 |  |  |  |  | if (n < 2) return 0; | 
| 557 |  |  |  |  |  |  | /* Check for small divisors */ | 
| 558 | 10268 | 100 |  |  |  |  | if (!(n&1)) { | 
| 559 | 24 | 100 |  |  |  |  | if (n & (n-1)) return 0; | 
| 560 | 9 |  |  |  |  |  | *prime = 2; | 
| 561 | 9 | 50 |  |  |  |  | return ctz(n); | 
| 562 |  |  |  |  |  |  | } | 
| 563 | 10244 | 100 |  |  |  |  | if ((n%3) == 0) { | 
| 564 |  |  |  |  |  |  | /* if (UVCONST(12157665459056928801) % n) return 0; */ | 
| 565 | 156 | 100 |  |  |  |  | do { n /= 3; power++; } while (n > 1 && (n%3) == 0); | 
|  |  | 100 |  |  |  |  |  | 
| 566 | 15 | 100 |  |  |  |  | if (n != 1) return 0; | 
| 567 | 11 |  |  |  |  |  | *prime = 3; | 
| 568 | 11 |  |  |  |  |  | return power; | 
| 569 |  |  |  |  |  |  | } | 
| 570 | 10229 | 100 |  |  |  |  | if ((n%5) == 0) { | 
| 571 | 2573 | 100 |  |  |  |  | do { n /= 5; power++; } while (n > 1 && (n%5) == 0); | 
|  |  | 100 |  |  |  |  |  | 
| 572 | 2007 | 100 |  |  |  |  | if (n != 1) return 0; | 
| 573 | 10 |  |  |  |  |  | *prime = 5; | 
| 574 | 10 |  |  |  |  |  | return power; | 
| 575 |  |  |  |  |  |  | } | 
| 576 | 8222 | 100 |  |  |  |  | if ((n%7) == 0) { | 
| 577 | 1403 | 100 |  |  |  |  | do { n /= 7; power++; } while (n > 1 && (n%7) == 0); | 
|  |  | 100 |  |  |  |  |  | 
| 578 | 1149 | 100 |  |  |  |  | if (n != 1) return 0; | 
| 579 | 9 |  |  |  |  |  | *prime = 7; | 
| 580 | 9 |  |  |  |  |  | return power; | 
| 581 |  |  |  |  |  |  | } | 
| 582 | 7073 | 100 |  |  |  |  | if (is_prob_prime(n)) | 
| 583 | 2690 |  |  |  |  |  | { *prime = n; return 1; } | 
| 584 |  |  |  |  |  |  | /* Composite.  Test for perfect power with prime root. */ | 
| 585 | 4383 |  |  |  |  |  | power = powerof(n); | 
| 586 | 4383 | 100 |  |  |  |  | if (power == 1) power = 0; | 
| 587 | 4383 | 100 |  |  |  |  | if (power) { | 
| 588 | 121 |  |  |  |  |  | UV root = rootof(n, (UV)power); | 
| 589 | 121 | 100 |  |  |  |  | if (is_prob_prime(root)) | 
| 590 | 103 |  |  |  |  |  | *prime = root; | 
| 591 |  |  |  |  |  |  | else | 
| 592 | 18 |  |  |  |  |  | power = 0; | 
| 593 |  |  |  |  |  |  | } | 
| 594 | 4383 |  |  |  |  |  | return power; | 
| 595 |  |  |  |  |  |  | } | 
| 596 |  |  |  |  |  |  |  | 
| 597 | 65 |  |  |  |  |  | UV valuation(UV n, UV k) | 
| 598 |  |  |  |  |  |  | { | 
| 599 | 65 |  |  |  |  |  | UV v = 0; | 
| 600 | 65 |  |  |  |  |  | UV kpower = k; | 
| 601 | 65 | 100 |  |  |  |  | if (k < 2 || n < 2) return 0; | 
|  |  | 100 |  |  |  |  |  | 
| 602 | 55 | 100 |  |  |  |  | if (k == 2) return ctz(n); | 
|  |  | 50 |  |  |  |  |  | 
| 603 | 4 | 100 |  |  |  |  | while ( !(n % kpower) ) { | 
| 604 | 3 |  |  |  |  |  | kpower *= k; | 
| 605 | 3 |  |  |  |  |  | v++; | 
| 606 |  |  |  |  |  |  | } | 
| 607 | 1 |  |  |  |  |  | return v; | 
| 608 |  |  |  |  |  |  | } | 
| 609 |  |  |  |  |  |  |  | 
| 610 | 601 |  |  |  |  |  | UV logint(UV n, UV b) | 
| 611 |  |  |  |  |  |  | { | 
| 612 |  |  |  |  |  |  | /* UV e;  for (e=0; n; n /= b) e++;  return e-1; */ | 
| 613 | 601 |  |  |  |  |  | UV v, e = 0; | 
| 614 | 601 | 100 |  |  |  |  | if (b == 2) | 
| 615 | 200 | 50 |  |  |  |  | return log2floor(n); | 
| 616 | 401 | 50 |  |  |  |  | if (n > UV_MAX/b) { | 
| 617 | 0 |  |  |  |  |  | n /= b; | 
| 618 | 0 |  |  |  |  |  | e = 1; | 
| 619 |  |  |  |  |  |  | } | 
| 620 | 1541 | 100 |  |  |  |  | for (v = b; v <= n; v *= b) | 
| 621 | 1140 |  |  |  |  |  | e++; | 
| 622 | 401 |  |  |  |  |  | return e; | 
| 623 |  |  |  |  |  |  | } | 
| 624 |  |  |  |  |  |  |  | 
| 625 | 2 |  |  |  |  |  | UV mpu_popcount_string(const char* ptr, uint32_t len) | 
| 626 |  |  |  |  |  |  | { | 
| 627 | 2 |  |  |  |  |  | uint32_t count = 0, i, j, d, v, power, slen, *s, *sptr; | 
| 628 |  |  |  |  |  |  |  | 
| 629 | 2 | 50 |  |  |  |  | while (len > 0 && (*ptr == '0' || *ptr == '+' || *ptr == '-')) | 
|  |  | 50 |  |  |  |  |  | 
|  |  | 50 |  |  |  |  |  | 
|  |  | 50 |  |  |  |  |  | 
| 630 | 0 |  |  |  |  |  | {  ptr++;  len--;  } | 
| 631 |  |  |  |  |  |  |  | 
| 632 |  |  |  |  |  |  | /* Create s as array of base 10^8 numbers */ | 
| 633 | 2 |  |  |  |  |  | slen = (len + 7) / 8; | 
| 634 | 2 | 50 |  |  |  |  | Newz(0, s, slen, uint32_t); | 
| 635 | 19 | 100 |  |  |  |  | for (i = 0; i < slen; i++) {  /* Chunks of 8 digits */ | 
| 636 | 145 | 100 |  |  |  |  | for (j = 0, d = 0, power = 1;  j < 8 && len > 0;  j++, power *= 10) { | 
|  |  | 100 |  |  |  |  |  | 
| 637 | 128 |  |  |  |  |  | v = ptr[--len] - '0'; | 
| 638 | 128 | 50 |  |  |  |  | if (v > 9) croak("Parameter '%s' must be a positive integer",ptr); | 
| 639 | 128 |  |  |  |  |  | d += power * v; | 
| 640 |  |  |  |  |  |  | } | 
| 641 | 17 |  |  |  |  |  | s[slen - 1 - i] = d; | 
| 642 |  |  |  |  |  |  | } | 
| 643 |  |  |  |  |  |  | /* Repeatedly count and divide by 2 across s */ | 
| 644 | 374 | 100 |  |  |  |  | while (slen > 1) { | 
| 645 | 372 | 100 |  |  |  |  | if (s[slen-1] & 1)  count++; | 
| 646 | 372 |  |  |  |  |  | sptr = s; | 
| 647 | 372 | 100 |  |  |  |  | if (s[0] == 1) { | 
| 648 | 15 | 50 |  |  |  |  | if (--slen == 0) break; | 
| 649 | 15 |  |  |  |  |  | *++sptr += 100000000; | 
| 650 |  |  |  |  |  |  | } | 
| 651 | 2293 | 100 |  |  |  |  | for (i = 0; i < slen; i++) { | 
| 652 | 1921 | 100 |  |  |  |  | if ( (i+1) < slen  &&  sptr[i] & 1 ) sptr[i+1] += 100000000; | 
|  |  | 100 |  |  |  |  |  | 
| 653 | 1921 |  |  |  |  |  | s[i] = sptr[i] >> 1; | 
| 654 |  |  |  |  |  |  | } | 
| 655 |  |  |  |  |  |  | } | 
| 656 |  |  |  |  |  |  | /* For final base 10^8 number just do naive popcnt */ | 
| 657 | 56 | 100 |  |  |  |  | for (d = s[0]; d > 0; d >>= 1) | 
| 658 | 54 | 100 |  |  |  |  | if (d & 1) | 
| 659 | 25 |  |  |  |  |  | count++; | 
| 660 | 2 |  |  |  |  |  | Safefree(s); | 
| 661 | 2 |  |  |  |  |  | return count; | 
| 662 |  |  |  |  |  |  | } | 
| 663 |  |  |  |  |  |  |  | 
| 664 |  |  |  |  |  |  |  | 
| 665 |  |  |  |  |  |  | /* How many times does 2 divide n? */ | 
| 666 |  |  |  |  |  |  | #define padic2(n)  ctz(n) | 
| 667 |  |  |  |  |  |  | #define IS_MOD8_3OR5(x)  (((x)&7)==3 || ((x)&7)==5) | 
| 668 |  |  |  |  |  |  |  | 
| 669 | 1275 |  |  |  |  |  | static int kronecker_uu_sign(UV a, UV b, int s) { | 
| 670 | 4876 | 100 |  |  |  |  | while (a) { | 
| 671 | 3601 | 50 |  |  |  |  | int r = padic2(a); | 
| 672 | 3601 | 100 |  |  |  |  | if (r) { | 
| 673 | 1623 | 100 |  |  |  |  | if ((r&1)  &&  IS_MOD8_3OR5(b))  s = -s; | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
| 674 | 1623 |  |  |  |  |  | a >>= r; | 
| 675 |  |  |  |  |  |  | } | 
| 676 | 3601 | 100 |  |  |  |  | if (a & b & 2)  s = -s; | 
| 677 | 3601 |  |  |  |  |  | { UV t = b % a;  b = a;  a = t; } | 
| 678 |  |  |  |  |  |  | } | 
| 679 | 1275 | 100 |  |  |  |  | return (b == 1) ? s : 0; | 
| 680 |  |  |  |  |  |  | } | 
| 681 |  |  |  |  |  |  |  | 
| 682 | 1281 |  |  |  |  |  | int kronecker_uu(UV a, UV b) { | 
| 683 |  |  |  |  |  |  | int r, s; | 
| 684 | 1281 | 100 |  |  |  |  | if (b & 1)   return kronecker_uu_sign(a, b, 1); | 
| 685 | 42 | 100 |  |  |  |  | if (!(a&1))  return 0; | 
| 686 | 24 |  |  |  |  |  | s = 1; | 
| 687 | 24 | 100 |  |  |  |  | r = padic2(b); | 
| 688 | 24 | 50 |  |  |  |  | if (r) { | 
| 689 | 24 | 100 |  |  |  |  | if ((r&1) && IS_MOD8_3OR5(a))  s = -s; | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
| 690 | 24 |  |  |  |  |  | b >>= r; | 
| 691 |  |  |  |  |  |  | } | 
| 692 | 24 |  |  |  |  |  | return kronecker_uu_sign(a, b, s); | 
| 693 |  |  |  |  |  |  | } | 
| 694 |  |  |  |  |  |  |  | 
| 695 | 49 |  |  |  |  |  | int kronecker_su(IV a, UV b) { | 
| 696 |  |  |  |  |  |  | int r, s; | 
| 697 | 49 | 100 |  |  |  |  | if (a >= 0)  return kronecker_uu(a, b); | 
| 698 | 14 | 100 |  |  |  |  | if (b == 0)  return (a == 1 || a == -1) ? 1 : 0; | 
|  |  | 50 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
| 699 | 12 |  |  |  |  |  | s = 1; | 
| 700 | 12 | 50 |  |  |  |  | r = padic2(b); | 
| 701 | 12 | 100 |  |  |  |  | if (r) { | 
| 702 | 2 | 50 |  |  |  |  | if (!(a&1))  return 0; | 
| 703 | 2 | 50 |  |  |  |  | if ((r&1) && IS_MOD8_3OR5(a))  s = -s; | 
|  |  | 50 |  |  |  |  |  | 
|  |  | 50 |  |  |  |  |  | 
| 704 | 2 |  |  |  |  |  | b >>= r; | 
| 705 |  |  |  |  |  |  | } | 
| 706 | 12 |  |  |  |  |  | a %= (IV) b; | 
| 707 | 12 | 100 |  |  |  |  | if (a < 0)  a += b; | 
| 708 | 12 |  |  |  |  |  | return kronecker_uu_sign(a, b, s); | 
| 709 |  |  |  |  |  |  | } | 
| 710 |  |  |  |  |  |  |  | 
| 711 | 18 |  |  |  |  |  | int kronecker_ss(IV a, IV b) { | 
| 712 | 18 | 100 |  |  |  |  | if (a >= 0 && b >= 0) | 
|  |  | 50 |  |  |  |  |  | 
| 713 | 0 | 0 |  |  |  |  | return (b & 1)  ?  kronecker_uu_sign(a, b, 1)  :  kronecker_uu(a,b); | 
| 714 | 18 | 100 |  |  |  |  | if (b >= 0) | 
| 715 | 7 |  |  |  |  |  | return kronecker_su(a, b); | 
| 716 | 11 | 100 |  |  |  |  | return kronecker_su(a, -b) * ((a < 0) ? -1 : 1); | 
| 717 |  |  |  |  |  |  | } | 
| 718 |  |  |  |  |  |  |  | 
| 719 | 2752 |  |  |  |  |  | UV factorial(UV n) { | 
| 720 | 2752 |  |  |  |  |  | UV i, r = 1; | 
| 721 | 2752 | 100 |  |  |  |  | if ( (n > 12 && sizeof(UV) <= 4) || (n > 20 && sizeof(UV) <= 8) ) return 0; | 
| 722 | 16151 | 100 |  |  |  |  | for (i = 2; i <= n; i++) | 
| 723 | 13871 |  |  |  |  |  | r *= i; | 
| 724 | 2280 |  |  |  |  |  | return r; | 
| 725 |  |  |  |  |  |  | } | 
| 726 |  |  |  |  |  |  |  | 
| 727 | 25124 |  |  |  |  |  | UV binomial(UV n, UV k) {    /* Thanks to MJD and RosettaCode for ideas */ | 
| 728 | 25124 |  |  |  |  |  | UV d, g, r = 1; | 
| 729 | 25124 | 100 |  |  |  |  | if (k == 0) return 1; | 
| 730 | 24938 | 100 |  |  |  |  | if (k == 1) return n; | 
| 731 | 22904 | 100 |  |  |  |  | if (k >= n) return (k == n); | 
| 732 | 21351 | 100 |  |  |  |  | if (k > n/2) k = n-k; | 
| 733 | 213307 | 100 |  |  |  |  | for (d = 1; d <= k; d++) { | 
| 734 | 197192 | 100 |  |  |  |  | if (r >= UV_MAX/n) {  /* Possible overflow */ | 
| 735 |  |  |  |  |  |  | UV nr, dr;  /* reduced numerator / denominator */ | 
| 736 | 14824 |  |  |  |  |  | g = gcd_ui(n, d);  nr = n/g;  dr = d/g; | 
| 737 | 14824 |  |  |  |  |  | g = gcd_ui(r, dr);  r = r/g;  dr = dr/g; | 
| 738 | 14824 | 100 |  |  |  |  | if (r >= UV_MAX/nr) return 0;  /* Unavoidable overflow */ | 
| 739 | 9588 |  |  |  |  |  | r *= nr; | 
| 740 | 9588 |  |  |  |  |  | r /= dr; | 
| 741 | 9588 |  |  |  |  |  | n--; | 
| 742 |  |  |  |  |  |  | } else { | 
| 743 | 182368 |  |  |  |  |  | r *= n--; | 
| 744 | 182368 |  |  |  |  |  | r /= d; | 
| 745 |  |  |  |  |  |  | } | 
| 746 |  |  |  |  |  |  | } | 
| 747 | 16115 |  |  |  |  |  | return r; | 
| 748 |  |  |  |  |  |  | } | 
| 749 |  |  |  |  |  |  |  | 
| 750 | 190 |  |  |  |  |  | UV stirling3(UV n, UV m) {   /* Lah numbers */ | 
| 751 |  |  |  |  |  |  | UV f1, f2; | 
| 752 |  |  |  |  |  |  |  | 
| 753 | 190 | 50 |  |  |  |  | if (m == n) return 1; | 
| 754 | 190 | 50 |  |  |  |  | if (n == 0 || m == 0 || m > n) return 0; | 
|  |  | 50 |  |  |  |  |  | 
|  |  | 50 |  |  |  |  |  | 
| 755 | 190 | 100 |  |  |  |  | if (m == 1) return factorial(n); | 
| 756 |  |  |  |  |  |  |  | 
| 757 | 171 |  |  |  |  |  | f1 = binomial(n, m); | 
| 758 | 171 | 50 |  |  |  |  | if (f1 == 0)  return 0; | 
| 759 | 171 |  |  |  |  |  | f2 = binomial(n-1, m-1); | 
| 760 | 171 | 50 |  |  |  |  | if (f2 == 0 || f1 >= UV_MAX/f2)  return 0; | 
|  |  | 50 |  |  |  |  |  | 
| 761 | 171 |  |  |  |  |  | f1 *= f2; | 
| 762 | 171 |  |  |  |  |  | f2 = factorial(n-m); | 
| 763 | 171 | 50 |  |  |  |  | if (f2 == 0 || f1 >= UV_MAX/f2)  return 0; | 
|  |  | 100 |  |  |  |  |  | 
| 764 | 166 |  |  |  |  |  | return f1 * f2; | 
| 765 |  |  |  |  |  |  | } | 
| 766 |  |  |  |  |  |  |  | 
| 767 | 1789 |  |  |  |  |  | IV stirling2(UV n, UV m) { | 
| 768 |  |  |  |  |  |  | UV f; | 
| 769 | 1789 |  |  |  |  |  | IV j, k, t, s = 0; | 
| 770 |  |  |  |  |  |  |  | 
| 771 | 1789 | 50 |  |  |  |  | if (m == n) return 1; | 
| 772 | 1789 | 50 |  |  |  |  | if (n == 0 || m == 0 || m > n) return 0; | 
|  |  | 50 |  |  |  |  |  | 
|  |  | 50 |  |  |  |  |  | 
| 773 | 1789 | 100 |  |  |  |  | if (m == 1) return 1; | 
| 774 |  |  |  |  |  |  |  | 
| 775 | 1549 | 100 |  |  |  |  | if ((f = factorial(m)) == 0) return 0; | 
| 776 | 7661 | 100 |  |  |  |  | for (j = 1; j <= (IV)m; j++) { | 
| 777 | 6586 |  |  |  |  |  | t = binomial(m, j); | 
| 778 | 121380 | 100 |  |  |  |  | for (k = 1; k <= (IV)n; k++) { | 
| 779 | 115103 | 50 |  |  |  |  | if (t == 0 || j >= IV_MAX/t) return 0; | 
|  |  | 100 |  |  |  |  |  | 
| 780 | 114794 |  |  |  |  |  | t *= j; | 
| 781 |  |  |  |  |  |  | } | 
| 782 | 6277 | 100 |  |  |  |  | if ((m-j) & 1) t *= -1; | 
| 783 | 6277 |  |  |  |  |  | s += t; | 
| 784 |  |  |  |  |  |  | } | 
| 785 | 1075 |  |  |  |  |  | return s/f; | 
| 786 |  |  |  |  |  |  | } | 
| 787 |  |  |  |  |  |  |  | 
| 788 | 192 |  |  |  |  |  | IV stirling1(UV n, UV m) { | 
| 789 | 192 |  |  |  |  |  | IV k, t, b1, b2, s2, s = 0; | 
| 790 |  |  |  |  |  |  |  | 
| 791 | 192 | 50 |  |  |  |  | if (m == n) return 1; | 
| 792 | 192 | 50 |  |  |  |  | if (n == 0 || m == 0 || m > n) return 0; | 
|  |  | 50 |  |  |  |  |  | 
|  |  | 50 |  |  |  |  |  | 
| 793 | 192 | 100 |  |  |  |  | if (m == 1) { | 
| 794 | 19 |  |  |  |  |  | UV f = factorial(n-1); | 
| 795 | 19 | 50 |  |  |  |  | if (f>(UV)IV_MAX) return 0; | 
| 796 | 19 | 100 |  |  |  |  | return (n&1) ? ((IV)f) : -((IV)f); | 
| 797 |  |  |  |  |  |  | } | 
| 798 |  |  |  |  |  |  |  | 
| 799 | 942 | 100 |  |  |  |  | for (k = 1; k <= (IV)(n-m); k++) { | 
| 800 | 816 |  |  |  |  |  | b1 = binomial(k + n - 1, n - m + k); | 
| 801 | 816 |  |  |  |  |  | b2 = binomial(2 * n - m, n - m - k); | 
| 802 | 816 |  |  |  |  |  | s2 = stirling2(n - m + k, k); | 
| 803 | 816 | 100 |  |  |  |  | if (b1 == 0 || b2 == 0 || s2 == 0 || b1 > IV_MAX/b2) return 0; | 
|  |  | 50 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 50 |  |  |  |  |  | 
| 804 | 804 |  |  |  |  |  | t = b1 * b2; | 
| 805 | 804 | 100 |  |  |  |  | if (s2 > IV_MAX/t) return 0; | 
| 806 | 769 |  |  |  |  |  | t *= s2; | 
| 807 | 769 | 100 |  |  |  |  | s += (k & 1) ? -t : t; | 
| 808 |  |  |  |  |  |  | } | 
| 809 | 126 |  |  |  |  |  | return s; | 
| 810 |  |  |  |  |  |  | } | 
| 811 |  |  |  |  |  |  |  | 
| 812 | 710 |  |  |  |  |  | UV totient(UV n) { | 
| 813 |  |  |  |  |  |  | UV i, nfacs, totient, lastf, facs[MPU_MAX_FACTORS+1]; | 
| 814 | 710 | 100 |  |  |  |  | if (n <= 1) return n; | 
| 815 | 614 |  |  |  |  |  | totient = 1; | 
| 816 |  |  |  |  |  |  | /* phi(2m) = 2phi(m) if m even, phi(m) if m odd */ | 
| 817 | 758 | 100 |  |  |  |  | while ((n & 0x3) == 0) { n >>= 1; totient <<= 1; } | 
| 818 | 614 | 100 |  |  |  |  | if ((n & 0x1) == 0) { n >>= 1; } | 
| 819 |  |  |  |  |  |  | /* factor and calculate totient */ | 
| 820 | 614 |  |  |  |  |  | nfacs = factor(n, facs); | 
| 821 | 614 |  |  |  |  |  | lastf = 0; | 
| 822 | 1263 | 100 |  |  |  |  | for (i = 0; i < nfacs; i++) { | 
| 823 | 649 |  |  |  |  |  | UV f = facs[i]; | 
| 824 | 649 | 100 |  |  |  |  | if (f == lastf) { totient *= f;               } | 
| 825 | 601 |  |  |  |  |  | else            { totient *= f-1;  lastf = f; } | 
| 826 |  |  |  |  |  |  | } | 
| 827 | 710 |  |  |  |  |  | return totient; | 
| 828 |  |  |  |  |  |  | } | 
| 829 |  |  |  |  |  |  |  | 
| 830 |  |  |  |  |  |  | static const UV jordan_overflow[5] = | 
| 831 |  |  |  |  |  |  | #if BITS_PER_WORD == 64 | 
| 832 |  |  |  |  |  |  | {UVCONST(4294967311), 2642249, 65537, 7133, 1627}; | 
| 833 |  |  |  |  |  |  | #else | 
| 834 |  |  |  |  |  |  | {UVCONST(     65537),    1627,   257,   85,   41}; | 
| 835 |  |  |  |  |  |  | #endif | 
| 836 | 490 |  |  |  |  |  | UV jordan_totient(UV k, UV n) { | 
| 837 |  |  |  |  |  |  | UV factors[MPU_MAX_FACTORS+1]; | 
| 838 |  |  |  |  |  |  | int nfac, i; | 
| 839 |  |  |  |  |  |  | UV totient; | 
| 840 | 490 | 50 |  |  |  |  | if (k == 0 || n <= 1) return (n == 1); | 
|  |  | 100 |  |  |  |  |  | 
| 841 | 479 | 100 |  |  |  |  | if (k > 6 || (k > 1 && n >= jordan_overflow[k-2])) return 0; | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 50 |  |  |  |  |  | 
| 842 |  |  |  |  |  |  |  | 
| 843 | 457 |  |  |  |  |  | totient = 1; | 
| 844 |  |  |  |  |  |  | /* Similar to Euler totient, shortcut even inputs */ | 
| 845 | 662 | 100 |  |  |  |  | while ((n & 0x3) == 0) { n >>= 1; totient *= (1< | 
| 846 | 457 | 100 |  |  |  |  | if ((n & 0x1) == 0) { n >>= 1; totient *= ((1< | 
| 847 | 457 |  |  |  |  |  | nfac = factor(n,factors); | 
| 848 | 960 | 100 |  |  |  |  | for (i = 0; i < nfac; i++) { | 
| 849 | 503 |  |  |  |  |  | UV p = factors[i]; | 
| 850 | 503 |  |  |  |  |  | UV pk = ipow(p,k); | 
| 851 | 503 |  |  |  |  |  | totient *= (pk-1); | 
| 852 | 580 | 100 |  |  |  |  | while (i+1 < nfac && p == factors[i+1]) { | 
|  |  | 100 |  |  |  |  |  | 
| 853 | 77 |  |  |  |  |  | i++; | 
| 854 | 77 |  |  |  |  |  | totient *= pk; | 
| 855 |  |  |  |  |  |  | } | 
| 856 |  |  |  |  |  |  | } | 
| 857 | 490 |  |  |  |  |  | return totient; | 
| 858 |  |  |  |  |  |  | } | 
| 859 |  |  |  |  |  |  |  | 
| 860 | 270 |  |  |  |  |  | UV carmichael_lambda(UV n) { | 
| 861 |  |  |  |  |  |  | UV fac[MPU_MAX_FACTORS+1]; | 
| 862 |  |  |  |  |  |  | int i, nfactors; | 
| 863 | 270 |  |  |  |  |  | UV lambda = 1; | 
| 864 |  |  |  |  |  |  |  | 
| 865 | 270 | 100 |  |  |  |  | if (n < 8) return totient(n); | 
| 866 | 262 | 100 |  |  |  |  | if ((n & (n-1)) == 0) return n >> 2; | 
| 867 |  |  |  |  |  |  |  | 
| 868 | 253 | 50 |  |  |  |  | i = ctz(n); | 
| 869 | 253 | 100 |  |  |  |  | if (i > 0) { | 
| 870 | 95 |  |  |  |  |  | n >>= i; | 
| 871 | 95 | 100 |  |  |  |  | lambda <<= (i>2) ? i-2 : i-1; | 
| 872 |  |  |  |  |  |  | } | 
| 873 | 253 |  |  |  |  |  | nfactors = factor(n, fac); | 
| 874 | 600 | 100 |  |  |  |  | for (i = 0; i < nfactors; i++) { | 
| 875 | 347 |  |  |  |  |  | UV p = fac[i], pk = p-1; | 
| 876 | 394 | 100 |  |  |  |  | while (i+1 < nfactors && p == fac[i+1]) { | 
|  |  | 100 |  |  |  |  |  | 
| 877 | 47 |  |  |  |  |  | i++; | 
| 878 | 47 |  |  |  |  |  | pk *= p; | 
| 879 |  |  |  |  |  |  | } | 
| 880 | 347 |  |  |  |  |  | lambda = lcm_ui(lambda, pk); | 
| 881 |  |  |  |  |  |  | } | 
| 882 | 270 |  |  |  |  |  | return lambda; | 
| 883 |  |  |  |  |  |  | } | 
| 884 |  |  |  |  |  |  |  | 
| 885 | 20000 |  |  |  |  |  | int is_carmichael(UV n) { | 
| 886 |  |  |  |  |  |  | UV fac[MPU_MAX_FACTORS+1]; | 
| 887 |  |  |  |  |  |  | UV exp[MPU_MAX_FACTORS+1]; | 
| 888 |  |  |  |  |  |  | int i, nfactors; | 
| 889 |  |  |  |  |  |  |  | 
| 890 |  |  |  |  |  |  | /* Small or even is not a Carmichael number */ | 
| 891 | 20000 | 100 |  |  |  |  | if (n < 561 || !(n&1)) return 0; | 
|  |  | 100 |  |  |  |  |  | 
| 892 |  |  |  |  |  |  |  | 
| 893 |  |  |  |  |  |  | /* Simple pre-test for square free (odds only) */ | 
| 894 | 9720 | 100 |  |  |  |  | if (!(n% 9) || !(n%25) || !(n%49) || !(n%121) || !(n%169)) | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
| 895 | 1709 |  |  |  |  |  | return 0; | 
| 896 |  |  |  |  |  |  |  | 
| 897 |  |  |  |  |  |  | /* Check Korselt's criterion for small divisors */ | 
| 898 | 8011 | 100 |  |  |  |  | if (!(n% 5) && ((n-1) %  4 != 0)) return 0; | 
|  |  | 100 |  |  |  |  |  | 
| 899 | 7345 | 100 |  |  |  |  | if (!(n% 7) && ((n-1) %  6 != 0)) return 0; | 
|  |  | 100 |  |  |  |  |  | 
| 900 | 6771 | 100 |  |  |  |  | if (!(n%11) && ((n-1) % 10 != 0)) return 0; | 
|  |  | 100 |  |  |  |  |  | 
| 901 | 6333 | 100 |  |  |  |  | if (!(n%13) && ((n-1) % 12 != 0)) return 0; | 
|  |  | 100 |  |  |  |  |  | 
| 902 | 5984 | 100 |  |  |  |  | if (!(n%17) && ((n-1) % 16 != 0)) return 0; | 
|  |  | 100 |  |  |  |  |  | 
| 903 | 5678 | 100 |  |  |  |  | if (!(n%19) && ((n-1) % 18 != 0)) return 0; | 
|  |  | 100 |  |  |  |  |  | 
| 904 | 5417 | 100 |  |  |  |  | if (!(n%23) && ((n-1) % 22 != 0)) return 0; | 
|  |  | 100 |  |  |  |  |  | 
| 905 |  |  |  |  |  |  |  | 
| 906 |  |  |  |  |  |  | /* Fast check without having to factor */ | 
| 907 | 5197 | 50 |  |  |  |  | if (n > 5000000) { | 
| 908 | 0 | 0 |  |  |  |  | if (!(n%29) && ((n-1) % 28 != 0)) return 0; | 
|  |  | 0 |  |  |  |  |  | 
| 909 | 0 | 0 |  |  |  |  | if (!(n%31) && ((n-1) % 30 != 0)) return 0; | 
|  |  | 0 |  |  |  |  |  | 
| 910 | 0 | 0 |  |  |  |  | if (!(n%37) && ((n-1) % 36 != 0)) return 0; | 
|  |  | 0 |  |  |  |  |  | 
| 911 | 0 | 0 |  |  |  |  | if (!(n%41) && ((n-1) % 40 != 0)) return 0; | 
|  |  | 0 |  |  |  |  |  | 
| 912 | 0 | 0 |  |  |  |  | if (!(n%43) && ((n-1) % 42 != 0)) return 0; | 
|  |  | 0 |  |  |  |  |  | 
| 913 | 0 | 0 |  |  |  |  | if (!is_pseudoprime(n,2)) return 0; | 
| 914 |  |  |  |  |  |  | } | 
| 915 |  |  |  |  |  |  |  | 
| 916 | 5197 |  |  |  |  |  | nfactors = factor_exp(n, fac, exp); | 
| 917 | 5197 | 100 |  |  |  |  | if (nfactors < 3) | 
| 918 | 4664 |  |  |  |  |  | return 0; | 
| 919 | 1330 | 100 |  |  |  |  | for (i = 0; i < nfactors; i++) { | 
| 920 | 1321 | 50 |  |  |  |  | if (exp[i] > 1  ||  ((n-1) % (fac[i]-1)) != 0) | 
|  |  | 100 |  |  |  |  |  | 
| 921 | 524 |  |  |  |  |  | return 0; | 
| 922 |  |  |  |  |  |  | } | 
| 923 | 20000 |  |  |  |  |  | return 1; | 
| 924 |  |  |  |  |  |  | } | 
| 925 |  |  |  |  |  |  |  | 
| 926 | 8230 |  |  |  |  |  | static int is_quasi_base(int nfactors, UV *fac, UV p, UV b) { | 
| 927 |  |  |  |  |  |  | int i; | 
| 928 | 13159 | 100 |  |  |  |  | for (i = 0; i < nfactors; i++) { | 
| 929 | 13015 |  |  |  |  |  | UV d = fac[i] - b; | 
| 930 | 13015 | 50 |  |  |  |  | if (d == 0 || (p % d) != 0) | 
|  |  | 100 |  |  |  |  |  | 
| 931 | 8086 |  |  |  |  |  | return 0; | 
| 932 |  |  |  |  |  |  | } | 
| 933 | 144 |  |  |  |  |  | return 1; | 
| 934 |  |  |  |  |  |  | } | 
| 935 |  |  |  |  |  |  |  | 
| 936 |  |  |  |  |  |  | /* Returns number of bases that pass */ | 
| 937 | 5402 |  |  |  |  |  | UV is_quasi_carmichael(UV n) { | 
| 938 |  |  |  |  |  |  | UV nbases, fac[MPU_MAX_FACTORS+1], exp[MPU_MAX_FACTORS+1]; | 
| 939 |  |  |  |  |  |  | UV spf, lpf, ndivisors, *divs; | 
| 940 |  |  |  |  |  |  | int i, nfactors; | 
| 941 |  |  |  |  |  |  |  | 
| 942 | 5402 | 100 |  |  |  |  | if (n < 35) return 0; | 
| 943 |  |  |  |  |  |  |  | 
| 944 |  |  |  |  |  |  | /* Simple pre-test for square free */ | 
| 945 | 5334 | 100 |  |  |  |  | if (!(n% 4) || !(n% 9) || !(n%25) || !(n%49) || !(n%121) || !(n%169)) | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
| 946 | 2041 |  |  |  |  |  | return 0; | 
| 947 |  |  |  |  |  |  |  | 
| 948 | 3293 |  |  |  |  |  | nfactors = factor_exp(n, fac, exp); | 
| 949 |  |  |  |  |  |  | /* Must be composite */ | 
| 950 | 3293 | 100 |  |  |  |  | if (nfactors < 2) | 
| 951 | 741 |  |  |  |  |  | return 0; | 
| 952 |  |  |  |  |  |  | /* Must be square free */ | 
| 953 | 8889 | 100 |  |  |  |  | for (i = 0; i < nfactors; i++) | 
| 954 | 6371 | 100 |  |  |  |  | if (exp[i] > 1) | 
| 955 | 34 |  |  |  |  |  | return 0; | 
| 956 |  |  |  |  |  |  |  | 
| 957 | 2518 |  |  |  |  |  | nbases = 0; | 
| 958 | 2518 |  |  |  |  |  | spf = fac[0]; | 
| 959 | 2518 |  |  |  |  |  | lpf = fac[nfactors-1]; | 
| 960 |  |  |  |  |  |  |  | 
| 961 |  |  |  |  |  |  | /* Algorithm from Hiroaki Yamanouchi, 2015 */ | 
| 962 | 2518 | 100 |  |  |  |  | if (nfactors == 2) { | 
| 963 | 1448 |  |  |  |  |  | divs = _divisor_list(n / spf - 1, &ndivisors); | 
| 964 | 7401 | 50 |  |  |  |  | for (i = 0; i < (int)ndivisors; i++) { | 
| 965 | 5953 |  |  |  |  |  | UV d = divs[i]; | 
| 966 | 5953 |  |  |  |  |  | UV k = spf - d; | 
| 967 | 5953 | 100 |  |  |  |  | if (d >= spf) break; | 
| 968 | 4505 | 100 |  |  |  |  | if (is_quasi_base(nfactors, fac, n-k, k)) | 
| 969 | 92 |  |  |  |  |  | nbases++; | 
| 970 |  |  |  |  |  |  | } | 
| 971 |  |  |  |  |  |  | } else { | 
| 972 | 1070 |  |  |  |  |  | divs = _divisor_list(lpf * (n / lpf - 1), &ndivisors); | 
| 973 | 9523 | 100 |  |  |  |  | for (i = 0; i < (int)ndivisors; i++) { | 
| 974 | 8453 |  |  |  |  |  | UV d = divs[i]; | 
| 975 | 8453 |  |  |  |  |  | UV k = lpf - d; | 
| 976 | 8453 | 100 |  |  |  |  | if (lpf > d && k >= spf) continue; | 
|  |  | 100 |  |  |  |  |  | 
| 977 | 4795 | 100 |  |  |  |  | if (k != 0 && is_quasi_base(nfactors, fac, n-k, k)) | 
|  |  | 100 |  |  |  |  |  | 
| 978 | 52 |  |  |  |  |  | nbases++; | 
| 979 |  |  |  |  |  |  | } | 
| 980 |  |  |  |  |  |  | } | 
| 981 | 2518 |  |  |  |  |  | Safefree(divs); | 
| 982 | 5402 |  |  |  |  |  | return nbases; | 
| 983 |  |  |  |  |  |  | } | 
| 984 |  |  |  |  |  |  |  | 
| 985 | 103 |  |  |  |  |  | int is_semiprime(UV n) { | 
| 986 |  |  |  |  |  |  | UV sp, p, n3, factors[2]; | 
| 987 |  |  |  |  |  |  |  | 
| 988 | 103 | 50 |  |  |  |  | if (n < 6) return (n == 4); | 
| 989 | 103 | 100 |  |  |  |  | if (!(n&1)) return !!is_prob_prime(n>>1); | 
| 990 | 52 | 100 |  |  |  |  | if (!(n%3)) return !!is_prob_prime(n/3); | 
| 991 | 36 | 100 |  |  |  |  | if (!(n%5)) return !!is_prob_prime(n/5); | 
| 992 |  |  |  |  |  |  | /* 27% of random inputs left */ | 
| 993 | 30 |  |  |  |  |  | n3 = icbrt(n); | 
| 994 | 246 | 100 |  |  |  |  | for (sp = 4; sp < 60; sp++) { | 
| 995 | 244 |  |  |  |  |  | p = nth_prime(sp); | 
| 996 | 244 | 100 |  |  |  |  | if (p > n3) | 
| 997 | 18 |  |  |  |  |  | break; | 
| 998 | 226 | 100 |  |  |  |  | if ((n % p) == 0) | 
| 999 | 10 |  |  |  |  |  | return !!is_prob_prime(n/p); | 
| 1000 |  |  |  |  |  |  | } | 
| 1001 |  |  |  |  |  |  | /* 9.8% of random inputs left */ | 
| 1002 | 20 | 100 |  |  |  |  | if (is_def_prime(n)) return 0; | 
|  |  | 100 |  |  |  |  |  | 
| 1003 | 9 | 100 |  |  |  |  | if (p > n3) return 1; | 
| 1004 |  |  |  |  |  |  | /* 4-8% of random inputs left */ | 
| 1005 |  |  |  |  |  |  | /* n is a composite and larger than p^3 */ | 
| 1006 | 2 | 50 |  |  |  |  | if (   pbrent_factor(n, factors, 90000, 1) == 2 | 
| 1007 |  |  |  |  |  |  | /* The only things we normally see by now are 16+ digit semiprimes */ | 
| 1008 | 0 | 0 |  |  |  |  | || pminus1_factor(n, factors, 4000, 4000) == 2 | 
| 1009 |  |  |  |  |  |  | /* 0.09% of random 64-bit inputs left */ | 
| 1010 | 0 | 0 |  |  |  |  | || pbrent_factor(n, factors, 180000, 7) == 2 ) | 
| 1011 | 2 | 50 |  |  |  |  | return (is_def_prime(factors[0]) && is_def_prime(factors[1])); | 
|  |  | 50 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 50 |  |  |  |  |  | 
|  |  | 50 |  |  |  |  |  | 
| 1012 |  |  |  |  |  |  | /* 0.002% of random 64-bit inputs left */ | 
| 1013 |  |  |  |  |  |  | { | 
| 1014 |  |  |  |  |  |  | UV facs[MPU_MAX_FACTORS+1]; | 
| 1015 | 103 |  |  |  |  |  | return (factor(n,facs) == 2); | 
| 1016 |  |  |  |  |  |  | } | 
| 1017 |  |  |  |  |  |  | } | 
| 1018 |  |  |  |  |  |  |  | 
| 1019 | 102 |  |  |  |  |  | int is_fundamental(UV n, int neg) { | 
| 1020 | 102 |  |  |  |  |  | UV r = n & 15; | 
| 1021 | 102 | 100 |  |  |  |  | if (r) { | 
| 1022 | 94 | 100 |  |  |  |  | if (!neg) { | 
| 1023 | 47 |  |  |  |  |  | switch (r & 3) { | 
| 1024 | 9 | 100 |  |  |  |  | case 0:  return (r ==  4) ? 0 : is_square_free(n >> 2); | 
|  |  | 50 |  |  |  |  |  | 
| 1025 | 13 |  |  |  |  |  | case 1:  return is_square_free(n); | 
| 1026 | 25 |  |  |  |  |  | default: break; | 
| 1027 |  |  |  |  |  |  | } | 
| 1028 |  |  |  |  |  |  | } else { | 
| 1029 | 47 |  |  |  |  |  | switch (r & 3) { | 
| 1030 | 9 | 100 |  |  |  |  | case 0:  return (r == 12) ? 0 : is_square_free(n >> 2); | 
|  |  | 100 |  |  |  |  |  | 
| 1031 | 12 |  |  |  |  |  | case 3:  return is_square_free(n); | 
| 1032 | 26 |  |  |  |  |  | default: break; | 
| 1033 |  |  |  |  |  |  | } | 
| 1034 |  |  |  |  |  |  | } | 
| 1035 |  |  |  |  |  |  | } | 
| 1036 | 59 |  |  |  |  |  | return 0; | 
| 1037 |  |  |  |  |  |  | } | 
| 1038 |  |  |  |  |  |  |  | 
| 1039 | 461 |  |  |  |  |  | static int _totpred(UV n, UV maxd) { | 
| 1040 |  |  |  |  |  |  | UV i, ndivisors, *divs; | 
| 1041 |  |  |  |  |  |  | int res; | 
| 1042 |  |  |  |  |  |  |  | 
| 1043 | 461 | 100 |  |  |  |  | if (n & 1) return 0; | 
| 1044 | 255 |  |  |  |  |  | n >>= 1; | 
| 1045 | 255 | 100 |  |  |  |  | if (n == 1) return 1; | 
| 1046 | 249 | 100 |  |  |  |  | if (n < maxd && is_prime(2*n+1)) return 1; | 
|  |  | 100 |  |  |  |  |  | 
| 1047 |  |  |  |  |  |  |  | 
| 1048 | 230 |  |  |  |  |  | divs = _divisor_list(n, &ndivisors); | 
| 1049 | 1086 | 100 |  |  |  |  | for (i = 0, res = 0; i < ndivisors && divs[i] < maxd && res == 0; i++) { | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
| 1050 | 856 |  |  |  |  |  | UV r, d = divs[i], p = 2*d+1; | 
| 1051 | 856 | 100 |  |  |  |  | if (!is_prime(p)) continue; | 
| 1052 | 349 |  |  |  |  |  | r = n/d; | 
| 1053 |  |  |  |  |  |  | while (1) { | 
| 1054 | 401 | 100 |  |  |  |  | if (r == p || _totpred(r, d)) { res = 1; break; } | 
|  |  | 100 |  |  |  |  |  | 
| 1055 | 385 | 100 |  |  |  |  | if (r % p) break; | 
| 1056 | 52 |  |  |  |  |  | r /= p; | 
| 1057 | 52 |  |  |  |  |  | } | 
| 1058 |  |  |  |  |  |  | } | 
| 1059 | 230 |  |  |  |  |  | Safefree(divs); | 
| 1060 | 461 |  |  |  |  |  | return res; | 
| 1061 |  |  |  |  |  |  | } | 
| 1062 |  |  |  |  |  |  |  | 
| 1063 | 124 |  |  |  |  |  | int is_totient(UV n) { | 
| 1064 | 124 | 100 |  |  |  |  | return (n == 0 || (n & 1))  ?  (n==1)  :  _totpred(n,n); | 
|  |  | 100 |  |  |  |  |  | 
| 1065 |  |  |  |  |  |  | } | 
| 1066 |  |  |  |  |  |  |  | 
| 1067 | 0 |  |  |  |  |  | UV pillai_v(UV n) { | 
| 1068 | 0 |  |  |  |  |  | UV v, fac = 5040 % n; | 
| 1069 | 0 | 0 |  |  |  |  | if (n == 0) return 0; | 
| 1070 | 0 | 0 |  |  |  |  | for (v = 8; v < n-1 && fac != 0; v++) { | 
|  |  | 0 |  |  |  |  |  | 
| 1071 | 0 | 0 |  |  |  |  | fac = (n < HALF_WORD) ? (fac*v) % n : mulmod(fac,v,n); | 
| 1072 | 0 | 0 |  |  |  |  | if (fac == n-1 && (n % v) != 1) | 
|  |  | 0 |  |  |  |  |  | 
| 1073 | 0 |  |  |  |  |  | return v; | 
| 1074 |  |  |  |  |  |  | } | 
| 1075 | 0 |  |  |  |  |  | return 0; | 
| 1076 |  |  |  |  |  |  | } | 
| 1077 |  |  |  |  |  |  |  | 
| 1078 |  |  |  |  |  |  |  | 
| 1079 | 29061 |  |  |  |  |  | int moebius(UV n) { | 
| 1080 |  |  |  |  |  |  | UV factors[MPU_MAX_FACTORS+1]; | 
| 1081 |  |  |  |  |  |  | UV i, nfactors; | 
| 1082 |  |  |  |  |  |  |  | 
| 1083 | 29061 | 100 |  |  |  |  | if (n <= 1) return (int)n; | 
| 1084 | 28800 | 100 |  |  |  |  | if ( n >= 49 && (!(n% 4) || !(n% 9) || !(n%25) || !(n%49)) ) | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
| 1085 | 10070 |  |  |  |  |  | return 0; | 
| 1086 |  |  |  |  |  |  |  | 
| 1087 | 18730 |  |  |  |  |  | nfactors = factor(n, factors); | 
| 1088 | 39219 | 100 |  |  |  |  | for (i = 1; i < nfactors; i++) | 
| 1089 | 21568 | 100 |  |  |  |  | if (factors[i] == factors[i-1]) | 
| 1090 | 1079 |  |  |  |  |  | return 0; | 
| 1091 | 29061 | 100 |  |  |  |  | return (nfactors % 2) ? -1 : 1; | 
| 1092 |  |  |  |  |  |  | } | 
| 1093 |  |  |  |  |  |  |  | 
| 1094 | 20 |  |  |  |  |  | UV exp_mangoldt(UV n) { | 
| 1095 |  |  |  |  |  |  | UV p; | 
| 1096 | 20 | 100 |  |  |  |  | if (!primepower(n,&p)) return 1;     /* Not a prime power */ | 
| 1097 | 20 |  |  |  |  |  | return p; | 
| 1098 |  |  |  |  |  |  | } | 
| 1099 |  |  |  |  |  |  |  | 
| 1100 |  |  |  |  |  |  |  | 
| 1101 | 75 |  |  |  |  |  | UV znorder(UV a, UV n) { | 
| 1102 |  |  |  |  |  |  | UV fac[MPU_MAX_FACTORS+1]; | 
| 1103 |  |  |  |  |  |  | UV exp[MPU_MAX_FACTORS+1]; | 
| 1104 |  |  |  |  |  |  | int i, nfactors; | 
| 1105 |  |  |  |  |  |  | UV k, phi; | 
| 1106 |  |  |  |  |  |  |  | 
| 1107 | 75 | 100 |  |  |  |  | if (n <= 1) return n;   /* znorder(x,0) = 0, znorder(x,1) = 1          */ | 
| 1108 | 69 | 100 |  |  |  |  | if (a <= 1) return a;   /* znorder(0,x) = 0, znorder(1,x) = 1  (x > 1) */ | 
| 1109 | 66 | 100 |  |  |  |  | if (gcd_ui(a,n) > 1)  return 0; | 
| 1110 |  |  |  |  |  |  |  | 
| 1111 |  |  |  |  |  |  | /* Cohen 1.4.3 using Carmichael Lambda */ | 
| 1112 | 60 |  |  |  |  |  | phi = carmichael_lambda(n); | 
| 1113 | 60 |  |  |  |  |  | nfactors = factor_exp(phi, fac, exp); | 
| 1114 | 60 |  |  |  |  |  | k = phi; | 
| 1115 | 279 | 100 |  |  |  |  | for (i = 0; i < nfactors; i++) { | 
| 1116 | 219 |  |  |  |  |  | UV b, a1, ek, pi = fac[i], ei = exp[i]; | 
| 1117 | 219 |  |  |  |  |  | b = ipow(pi,ei); | 
| 1118 | 219 |  |  |  |  |  | k /= b; | 
| 1119 | 219 |  |  |  |  |  | a1 = powmod(a, k, n); | 
| 1120 | 387 | 100 |  |  |  |  | for (ek = 0; a1 != 1 && ek++ <= ei; a1 = powmod(a1, pi, n)) | 
|  |  | 50 |  |  |  |  |  | 
| 1121 | 168 |  |  |  |  |  | k *= pi; | 
| 1122 | 219 | 50 |  |  |  |  | if (ek > ei) return 0; | 
| 1123 |  |  |  |  |  |  | } | 
| 1124 | 75 |  |  |  |  |  | return k; | 
| 1125 |  |  |  |  |  |  | } | 
| 1126 |  |  |  |  |  |  |  | 
| 1127 | 25 |  |  |  |  |  | UV znprimroot(UV n) { | 
| 1128 |  |  |  |  |  |  | UV fac[MPU_MAX_FACTORS+1]; | 
| 1129 |  |  |  |  |  |  | UV exp[MPU_MAX_FACTORS+1]; | 
| 1130 |  |  |  |  |  |  | UV a, phi, on, r; | 
| 1131 |  |  |  |  |  |  | int i, nfactors; | 
| 1132 |  |  |  |  |  |  |  | 
| 1133 | 25 | 100 |  |  |  |  | if (n <= 4) return (n == 0) ? 0 : n-1; | 
|  |  | 100 |  |  |  |  |  | 
| 1134 | 20 | 100 |  |  |  |  | if (n % 4 == 0)  return 0; | 
| 1135 |  |  |  |  |  |  |  | 
| 1136 | 19 | 100 |  |  |  |  | on = (n&1) ? n : (n>>1); | 
| 1137 | 19 |  |  |  |  |  | a = powerof(on); | 
| 1138 | 19 |  |  |  |  |  | r = rootof(on, a); | 
| 1139 | 19 | 100 |  |  |  |  | if (!is_prob_prime(r)) return 0;        /* c^a or 2c^a */ | 
| 1140 | 17 |  |  |  |  |  | phi = (r-1) * (on/r);                   /* p^a or 2p^a */ | 
| 1141 |  |  |  |  |  |  |  | 
| 1142 | 17 |  |  |  |  |  | nfactors = factor_exp(phi, fac, exp); | 
| 1143 | 83 | 100 |  |  |  |  | for (i = 0; i < nfactors; i++) | 
| 1144 | 66 |  |  |  |  |  | exp[i] = phi / fac[i];  /* exp[i] = phi(n) / i-th-factor-of-phi(n) */ | 
| 1145 | 870 | 50 |  |  |  |  | for (a = 2; a < n; a++) { | 
| 1146 |  |  |  |  |  |  | /* Skip first few perfect powers */ | 
| 1147 | 870 | 100 |  |  |  |  | if (a == 4 || a == 8 || a == 9) continue; | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
| 1148 |  |  |  |  |  |  | /* Skip values we know can't be right: (a|n) = 0 (or 1 for odd primes) */ | 
| 1149 | 845 | 100 |  |  |  |  | if (phi == n-1) { | 
| 1150 | 812 | 100 |  |  |  |  | if (kronecker_uu(a, n) != -1)  continue; | 
| 1151 |  |  |  |  |  |  | } else { | 
| 1152 | 33 | 100 |  |  |  |  | if (kronecker_uu(a, n) == 0)  continue; | 
| 1153 |  |  |  |  |  |  | } | 
| 1154 | 459 | 100 |  |  |  |  | for (i = 0; i < nfactors; i++) | 
| 1155 | 442 | 100 |  |  |  |  | if (powmod(a, exp[i], n) == 1) | 
| 1156 | 159 |  |  |  |  |  | break; | 
| 1157 | 176 | 100 |  |  |  |  | if (i == nfactors) return a; | 
| 1158 |  |  |  |  |  |  | } | 
| 1159 | 25 |  |  |  |  |  | return 0; | 
| 1160 |  |  |  |  |  |  | } | 
| 1161 |  |  |  |  |  |  |  | 
| 1162 | 47 |  |  |  |  |  | int is_primitive_root(UV a, UV n, int nprime) { | 
| 1163 |  |  |  |  |  |  | UV s, fac[MPU_MAX_FACTORS+1]; | 
| 1164 |  |  |  |  |  |  | int i, nfacs; | 
| 1165 | 47 | 100 |  |  |  |  | if (n <= 1) return n; | 
| 1166 | 45 | 100 |  |  |  |  | if (a >= n) a %= n; | 
| 1167 | 45 | 100 |  |  |  |  | if (gcd_ui(a,n) != 1) return 0; | 
| 1168 | 43 | 100 |  |  |  |  | s = nprime ? n-1 : totient(n); | 
| 1169 |  |  |  |  |  |  |  | 
| 1170 |  |  |  |  |  |  | /* a^x can be a primitive root only if gcd(x,s) = 1 */ | 
| 1171 | 43 |  |  |  |  |  | i = is_power(a,0); | 
| 1172 | 43 | 100 |  |  |  |  | if (i > 1 && gcd_ui(i, s) != 1) return 0; | 
|  |  | 100 |  |  |  |  |  | 
| 1173 |  |  |  |  |  |  |  | 
| 1174 |  |  |  |  |  |  | /* Quick check for small factors before full factor */ | 
| 1175 | 39 | 100 |  |  |  |  | if ((s % 2) == 0 && powmod(a, s/2, n) == 1) return 0; | 
|  |  | 100 |  |  |  |  |  | 
| 1176 |  |  |  |  |  |  |  | 
| 1177 |  |  |  |  |  |  | #if USE_MONTMATH | 
| 1178 | 30 | 100 |  |  |  |  | if (n & 1) { | 
| 1179 | 25 |  |  |  |  |  | const uint64_t npi = mont_inverse(n),  mont1 = mont_get1(n); | 
| 1180 | 25 |  |  |  |  |  | a = mont_geta(a, n); | 
| 1181 | 25 | 100 |  |  |  |  | if ((s % 3) == 0 && mont_powmod(a, s/3, n) == mont1) return 0; | 
|  |  | 100 |  |  |  |  |  | 
| 1182 | 24 | 100 |  |  |  |  | if ((s % 5) == 0 && mont_powmod(a, s/5, n) == mont1) return 0; | 
|  |  | 50 |  |  |  |  |  | 
| 1183 | 24 |  |  |  |  |  | nfacs = factor_exp(s, fac, 0); | 
| 1184 | 101 | 100 |  |  |  |  | for (i = 0; i < nfacs; i++) { | 
| 1185 | 77 | 100 |  |  |  |  | if (fac[i] > 5 && mont_powmod(a, s/fac[i], n) == mont1) return 0; | 
|  |  | 50 |  |  |  |  |  | 
| 1186 |  |  |  |  |  |  | } | 
| 1187 |  |  |  |  |  |  | } else | 
| 1188 |  |  |  |  |  |  | #endif | 
| 1189 |  |  |  |  |  |  | { | 
| 1190 | 5 | 50 |  |  |  |  | if ((s % 3) == 0 && powmod(a, s/3, n) == 1) return 0; | 
|  |  | 0 |  |  |  |  |  | 
| 1191 | 5 | 100 |  |  |  |  | if ((s % 5) == 0 && powmod(a, s/5, n) == 1) return 0; | 
|  |  | 50 |  |  |  |  |  | 
| 1192 |  |  |  |  |  |  | /* Complete factor and check each one not found above. */ | 
| 1193 | 4 |  |  |  |  |  | nfacs = factor_exp(s, fac, 0); | 
| 1194 | 7 | 100 |  |  |  |  | for (i = 0; i < nfacs; i++) { | 
| 1195 | 3 | 50 |  |  |  |  | if (fac[i] > 5 && powmod(a, s/fac[i], n) == 1) return 0; | 
|  |  | 0 |  |  |  |  |  | 
| 1196 |  |  |  |  |  |  | } | 
| 1197 |  |  |  |  |  |  | } | 
| 1198 | 47 |  |  |  |  |  | return 1; | 
| 1199 |  |  |  |  |  |  | } | 
| 1200 |  |  |  |  |  |  |  | 
| 1201 | 50 |  |  |  |  |  | IV gcdext(IV a, IV b, IV* u, IV* v, IV* cs, IV* ct) { | 
| 1202 | 50 |  |  |  |  |  | IV s = 0;  IV os = 1; | 
| 1203 | 50 |  |  |  |  |  | IV t = 1;  IV ot = 0; | 
| 1204 | 50 |  |  |  |  |  | IV r = b;  IV or = a; | 
| 1205 | 50 | 100 |  |  |  |  | if (a == 0 && b == 0) { os = 0; t = 0; } | 
|  |  | 100 |  |  |  |  |  | 
| 1206 | 335 | 100 |  |  |  |  | while (r != 0) { | 
| 1207 | 285 |  |  |  |  |  | IV quot = or / r; | 
| 1208 | 285 |  |  |  |  |  | { IV tmp = r; r = or - quot * r;  or = tmp; } | 
| 1209 | 285 |  |  |  |  |  | { IV tmp = s; s = os - quot * s;  os = tmp; } | 
| 1210 | 285 |  |  |  |  |  | { IV tmp = t; t = ot - quot * t;  ot = tmp; } | 
| 1211 |  |  |  |  |  |  | } | 
| 1212 | 50 | 100 |  |  |  |  | if (or < 0) /* correct sign */ | 
| 1213 | 4 |  |  |  |  |  | { or = -or; os = -os; ot = -ot; } | 
| 1214 | 50 | 50 |  |  |  |  | if (u  != 0) *u = os; | 
| 1215 | 50 | 50 |  |  |  |  | if (v  != 0) *v = ot; | 
| 1216 | 50 | 100 |  |  |  |  | if (cs != 0) *cs = s; | 
| 1217 | 50 | 100 |  |  |  |  | if (ct != 0) *ct = t; | 
| 1218 | 50 |  |  |  |  |  | return or; | 
| 1219 |  |  |  |  |  |  | } | 
| 1220 |  |  |  |  |  |  |  | 
| 1221 |  |  |  |  |  |  | /* Calculate 1/a mod n. */ | 
| 1222 | 161 |  |  |  |  |  | UV modinverse(UV a, UV n) { | 
| 1223 | 161 |  |  |  |  |  | IV t = 0;  UV nt = 1; | 
| 1224 | 161 |  |  |  |  |  | UV r = n;  UV nr = a; | 
| 1225 | 3792 | 100 |  |  |  |  | while (nr != 0) { | 
| 1226 | 3631 |  |  |  |  |  | UV quot = r / nr; | 
| 1227 | 3631 |  |  |  |  |  | { UV tmp = nt;  nt = t - quot*nt;  t = tmp; } | 
| 1228 | 3631 |  |  |  |  |  | { UV tmp = nr;  nr = r - quot*nr;  r = tmp; } | 
| 1229 |  |  |  |  |  |  | } | 
| 1230 | 161 | 100 |  |  |  |  | if (r > 1) return 0;  /* No inverse */ | 
| 1231 | 132 | 100 |  |  |  |  | if (t < 0) t += n; | 
| 1232 | 132 |  |  |  |  |  | return t; | 
| 1233 |  |  |  |  |  |  | } | 
| 1234 |  |  |  |  |  |  |  | 
| 1235 | 2 |  |  |  |  |  | UV divmod(UV a, UV b, UV n) {   /* a / b  mod n */ | 
| 1236 | 2 |  |  |  |  |  | UV binv = modinverse(b, n); | 
| 1237 | 2 | 50 |  |  |  |  | if (binv == 0)  return 0; | 
| 1238 | 2 |  |  |  |  |  | return mulmod(a, binv, n); | 
| 1239 |  |  |  |  |  |  | } | 
| 1240 |  |  |  |  |  |  |  | 
| 1241 | 183072 |  |  |  |  |  | static UV _powfactor(UV p, UV d, UV m) { | 
| 1242 | 183072 |  |  |  |  |  | UV e = 0; | 
| 1243 | 366571 | 100 |  |  |  |  | do { d /= p; e += d; } while (d > 0); | 
| 1244 | 183072 |  |  |  |  |  | return powmod(p, e, m); | 
| 1245 |  |  |  |  |  |  | } | 
| 1246 |  |  |  |  |  |  |  | 
| 1247 | 821 |  |  |  |  |  | UV factorialmod(UV n, UV m) {  /*  n! mod m */ | 
| 1248 | 821 |  |  |  |  |  | UV i, d = n, res = 1; | 
| 1249 |  |  |  |  |  |  |  | 
| 1250 | 821 | 50 |  |  |  |  | if (n >= m || m == 1) return 0; | 
|  |  | 100 |  |  |  |  |  | 
| 1251 |  |  |  |  |  |  |  | 
| 1252 | 820 | 100 |  |  |  |  | if (n <= 10) { /* Keep things simple for small n */ | 
| 1253 | 1672 | 100 |  |  |  |  | for (i = 2; i <= n && res != 0; i++) | 
|  |  | 100 |  |  |  |  |  | 
| 1254 | 1288 |  |  |  |  |  | res = (res * i) % m; | 
| 1255 | 384 |  |  |  |  |  | return res; | 
| 1256 |  |  |  |  |  |  | } | 
| 1257 |  |  |  |  |  |  |  | 
| 1258 | 436 | 100 |  |  |  |  | if (n > m/2 && is_prime(m))    /* Check if we can go backwards */ | 
|  |  | 100 |  |  |  |  |  | 
| 1259 | 74 |  |  |  |  |  | d = m-n-1; | 
| 1260 | 436 | 100 |  |  |  |  | if (d < 2) | 
| 1261 | 14 | 100 |  |  |  |  | return (d == 0) ? m-1 : 1;   /* Wilson's Theorem: n = m-1 and n = m-2 */ | 
| 1262 |  |  |  |  |  |  |  | 
| 1263 | 422 | 100 |  |  |  |  | if (d == n && d > 5000000) {   /* Check for composite m that leads to 0 */ | 
|  |  | 100 |  |  |  |  |  | 
| 1264 |  |  |  |  |  |  | UV fac[MPU_MAX_FACTORS+1], exp[MPU_MAX_FACTORS+1]; | 
| 1265 | 1 |  |  |  |  |  | int j, k, nfacs = factor_exp(m, fac, exp); | 
| 1266 | 2 | 100 |  |  |  |  | for (j = 0; j < nfacs; j++) { | 
| 1267 | 1 |  |  |  |  |  | UV t = fac[j]; | 
| 1268 | 3 | 100 |  |  |  |  | for (k = 1; (UV)k < exp[j]; k++) | 
| 1269 | 2 |  |  |  |  |  | t *= fac[j]; | 
| 1270 | 1 | 50 |  |  |  |  | if (n >= t) return 0; | 
| 1271 |  |  |  |  |  |  | } | 
| 1272 |  |  |  |  |  |  | } | 
| 1273 |  |  |  |  |  |  |  | 
| 1274 |  |  |  |  |  |  | #if USE_MONTMATH | 
| 1275 | 618 | 100 |  |  |  |  | if (m & 1 && d < 40000) { | 
|  |  | 100 |  |  |  |  |  | 
| 1276 | 196 |  |  |  |  |  | const uint64_t npi = mont_inverse(m),  mont1 = mont_get1(m); | 
| 1277 | 196 |  |  |  |  |  | uint64_t monti = mont1; | 
| 1278 | 196 |  |  |  |  |  | res = mont1; | 
| 1279 | 1828 | 100 |  |  |  |  | for (i = 2; i <= d && res != 0; i++) { | 
|  |  | 100 |  |  |  |  |  | 
| 1280 | 1632 |  |  |  |  |  | monti = addmod(monti,mont1,m); | 
| 1281 | 1632 | 50 |  |  |  |  | res = mont_mulmod(res,monti,m); | 
| 1282 |  |  |  |  |  |  | } | 
| 1283 | 196 | 50 |  |  |  |  | res = mont_recover(res, m); | 
| 1284 |  |  |  |  |  |  | } else | 
| 1285 |  |  |  |  |  |  | #endif | 
| 1286 | 226 | 100 |  |  |  |  | if (d < 10000) { | 
| 1287 | 2031 | 100 |  |  |  |  | for (i = 2; i <= d && res != 0; i++) | 
|  |  | 100 |  |  |  |  |  | 
| 1288 | 1806 |  |  |  |  |  | res = mulmod(res,i,m); | 
| 1289 |  |  |  |  |  |  | } else { | 
| 1290 |  |  |  |  |  |  | #if 0    /* Monolithic prime walk */ | 
| 1291 |  |  |  |  |  |  | START_DO_FOR_EACH_PRIME(2, d) { | 
| 1292 |  |  |  |  |  |  | UV k = (p > (d>>1))  ?  p  :  _powfactor(p, d, m); | 
| 1293 |  |  |  |  |  |  | res = mulmod(res, k, m); | 
| 1294 |  |  |  |  |  |  | if (res == 0) break; | 
| 1295 |  |  |  |  |  |  | } END_DO_FOR_EACH_PRIME; | 
| 1296 |  |  |  |  |  |  | #else    /* Segmented prime walk */ | 
| 1297 |  |  |  |  |  |  | unsigned char* segment; | 
| 1298 |  |  |  |  |  |  | UV seg_base, seg_low, seg_high; | 
| 1299 | 1 |  |  |  |  |  | void* ctx = start_segment_primes(7, d, &segment); | 
| 1300 | 4 | 100 |  |  |  |  | for (i = 1; i <= 3; i++)    /* Handle 2,3,5 assume d>10*/ | 
| 1301 | 3 |  |  |  |  |  | res = mulmod(res, _powfactor(2*i - (i>1), d, m), m); | 
| 1302 | 7 | 50 |  |  |  |  | while (res != 0 && next_segment_primes(ctx, &seg_base, &seg_low, &seg_high)) { | 
|  |  | 100 |  |  |  |  |  | 
| 1303 | 369350 | 50 |  |  |  |  | START_DO_FOR_EACH_SIEVE_PRIME( segment, seg_base, seg_low, seg_high ) | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 50 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
| 1304 | 348510 | 100 |  |  |  |  | UV k = (p > (d>>1))  ?  p  :  _powfactor(p, d, m); | 
| 1305 | 348510 |  |  |  |  |  | res = mulmod(res, k, m); | 
| 1306 | 348510 | 50 |  |  |  |  | if (res == 0) break; | 
| 1307 | 20834 |  |  |  |  |  | END_DO_FOR_EACH_SIEVE_PRIME | 
| 1308 |  |  |  |  |  |  | } | 
| 1309 | 1 |  |  |  |  |  | end_segment_primes(ctx); | 
| 1310 |  |  |  |  |  |  | #endif | 
| 1311 |  |  |  |  |  |  | } | 
| 1312 |  |  |  |  |  |  |  | 
| 1313 | 422 | 100 |  |  |  |  | if (d != n && res != 0) {      /* Handle backwards case */ | 
|  |  | 50 |  |  |  |  |  | 
| 1314 | 60 | 100 |  |  |  |  | if (!(d&1)) res = submod(m,res,m); | 
| 1315 | 60 |  |  |  |  |  | res = modinverse(res,m); | 
| 1316 |  |  |  |  |  |  | } | 
| 1317 |  |  |  |  |  |  |  | 
| 1318 | 422 |  |  |  |  |  | return res; | 
| 1319 |  |  |  |  |  |  | } | 
| 1320 |  |  |  |  |  |  |  | 
| 1321 | 15 |  |  |  |  |  | static int verify_sqrtmod(UV s, UV *rs, UV a, UV p) { | 
| 1322 | 15 | 100 |  |  |  |  | if (p-s < s)  s = p-s; | 
| 1323 | 15 | 50 |  |  |  |  | if (mulmod(s, s, p) != a) return 0; | 
| 1324 | 15 |  |  |  |  |  | *rs = s; | 
| 1325 | 15 |  |  |  |  |  | return 1; | 
| 1326 |  |  |  |  |  |  | } | 
| 1327 |  |  |  |  |  |  | #if !USE_MONTMATH | 
| 1328 |  |  |  |  |  |  | UV _sqrtmod_prime(UV a, UV p) { | 
| 1329 |  |  |  |  |  |  | if ((p % 4) == 3) { | 
| 1330 |  |  |  |  |  |  | return powmod(a, (p+1)>>2, p); | 
| 1331 |  |  |  |  |  |  | } | 
| 1332 |  |  |  |  |  |  | if ((p % 8) == 5) { /* Atkin's algorithm.  Faster than Legendre. */ | 
| 1333 |  |  |  |  |  |  | UV a2, alpha, beta, b; | 
| 1334 |  |  |  |  |  |  | a2 = addmod(a,a,p); | 
| 1335 |  |  |  |  |  |  | alpha = powmod(a2,(p-5)>>3,p); | 
| 1336 |  |  |  |  |  |  | beta  = mulmod(a2,sqrmod(alpha,p),p); | 
| 1337 |  |  |  |  |  |  | b     = mulmod(alpha, mulmod(a, (beta ? beta-1 : p-1), p), p); | 
| 1338 |  |  |  |  |  |  | return b; | 
| 1339 |  |  |  |  |  |  | } | 
| 1340 |  |  |  |  |  |  | if ((p % 16) == 9) { /* Müller's algorithm extending Atkin */ | 
| 1341 |  |  |  |  |  |  | UV a2, alpha, beta, b, d = 1; | 
| 1342 |  |  |  |  |  |  | a2 = addmod(a,a,p); | 
| 1343 |  |  |  |  |  |  | alpha = powmod(a2, (p-9)>>4, p); | 
| 1344 |  |  |  |  |  |  | beta  = mulmod(a2, sqrmod(alpha,p), p); | 
| 1345 |  |  |  |  |  |  | if (sqrmod(beta,p) != p-1) { | 
| 1346 |  |  |  |  |  |  | do { d += 2; } while (kronecker_uu(d,p) != -1 && d < p); | 
| 1347 |  |  |  |  |  |  | alpha = mulmod(alpha, powmod(d,(p-9)>>3,p), p); | 
| 1348 |  |  |  |  |  |  | beta  = mulmod(a2, mulmod(sqrmod(d,p),sqrmod(alpha,p),p), p); | 
| 1349 |  |  |  |  |  |  | } | 
| 1350 |  |  |  |  |  |  | b = mulmod(alpha, mulmod(a, mulmod(d,(beta ? beta-1 : p-1),p),p),p); | 
| 1351 |  |  |  |  |  |  | return b; | 
| 1352 |  |  |  |  |  |  | } | 
| 1353 |  |  |  |  |  |  |  | 
| 1354 |  |  |  |  |  |  | /* Verify Euler condition for odd p */ | 
| 1355 |  |  |  |  |  |  | if ((p & 1) && powmod(a,(p-1)>>1,p) != 1) return 0; | 
| 1356 |  |  |  |  |  |  |  | 
| 1357 |  |  |  |  |  |  | { | 
| 1358 |  |  |  |  |  |  | UV x, q, e, t, z, r, m, b; | 
| 1359 |  |  |  |  |  |  | q = p-1; | 
| 1360 |  |  |  |  |  |  | e = valuation(q, 2); | 
| 1361 |  |  |  |  |  |  | q >>= e; | 
| 1362 |  |  |  |  |  |  | t = 3; | 
| 1363 |  |  |  |  |  |  | while (kronecker_uu(t, p) != -1) { | 
| 1364 |  |  |  |  |  |  | t += 2; | 
| 1365 |  |  |  |  |  |  | if (t == 201) {           /* exit if p looks like a composite */ | 
| 1366 |  |  |  |  |  |  | if ((p % 2) == 0 || powmod(2, p-1, p) != 1 || powmod(3, p-1, p) != 1) | 
| 1367 |  |  |  |  |  |  | return 0; | 
| 1368 |  |  |  |  |  |  | } else if (t >= 20000) {  /* should never happen */ | 
| 1369 |  |  |  |  |  |  | return 0; | 
| 1370 |  |  |  |  |  |  | } | 
| 1371 |  |  |  |  |  |  | } | 
| 1372 |  |  |  |  |  |  | z = powmod(t, q, p); | 
| 1373 |  |  |  |  |  |  | b = powmod(a, q, p); | 
| 1374 |  |  |  |  |  |  | r = e; | 
| 1375 |  |  |  |  |  |  | q = (q+1) >> 1; | 
| 1376 |  |  |  |  |  |  | x = powmod(a, q, p); | 
| 1377 |  |  |  |  |  |  | while (b != 1) { | 
| 1378 |  |  |  |  |  |  | t = b; | 
| 1379 |  |  |  |  |  |  | for (m = 0; m < r && t != 1; m++) | 
| 1380 |  |  |  |  |  |  | t = sqrmod(t, p); | 
| 1381 |  |  |  |  |  |  | if (m >= r) break; | 
| 1382 |  |  |  |  |  |  | t = powmod(z, UVCONST(1) << (r-m-1), p); | 
| 1383 |  |  |  |  |  |  | x = mulmod(x, t, p); | 
| 1384 |  |  |  |  |  |  | z = mulmod(t, t, p); | 
| 1385 |  |  |  |  |  |  | b = mulmod(b, z, p); | 
| 1386 |  |  |  |  |  |  | r = m; | 
| 1387 |  |  |  |  |  |  | } | 
| 1388 |  |  |  |  |  |  | return x; | 
| 1389 |  |  |  |  |  |  | } | 
| 1390 |  |  |  |  |  |  | return 0; | 
| 1391 |  |  |  |  |  |  | } | 
| 1392 |  |  |  |  |  |  | #else | 
| 1393 | 8 |  |  |  |  |  | UV _sqrtmod_prime(UV a, UV p) { | 
| 1394 | 8 |  |  |  |  |  | const uint64_t npi = mont_inverse(p),  mont1 = mont_get1(p); | 
| 1395 | 8 |  |  |  |  |  | a = mont_geta(a,p); | 
| 1396 |  |  |  |  |  |  |  | 
| 1397 | 8 | 100 |  |  |  |  | if ((p % 4) == 3) { | 
| 1398 | 1 |  |  |  |  |  | UV b = mont_powmod(a, (p+1)>>2, p); | 
| 1399 | 1 | 50 |  |  |  |  | return mont_recover(b, p); | 
| 1400 |  |  |  |  |  |  | } | 
| 1401 |  |  |  |  |  |  |  | 
| 1402 | 7 | 100 |  |  |  |  | if ((p % 8) == 5) { /* Atkin's algorithm.  Faster than Legendre. */ | 
| 1403 |  |  |  |  |  |  | UV a2, alpha, beta, b; | 
| 1404 | 5 |  |  |  |  |  | a2 = addmod(a,a,p); | 
| 1405 | 5 |  |  |  |  |  | alpha = mont_powmod(a2,(p-5)>>3,p); | 
| 1406 | 5 | 50 |  |  |  |  | beta  = mont_mulmod(a2,mont_sqrmod(alpha,p),p); | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 50 |  |  |  |  |  | 
| 1407 | 5 |  |  |  |  |  | beta  = submod(beta, mont1, p); | 
| 1408 | 5 | 50 |  |  |  |  | b     = mont_mulmod(alpha, mont_mulmod(a, beta, p), p); | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 50 |  |  |  |  |  | 
| 1409 | 5 | 50 |  |  |  |  | return mont_recover(b, p); | 
| 1410 |  |  |  |  |  |  | } | 
| 1411 | 2 | 100 |  |  |  |  | if ((p % 16) == 9) { /* Müller's algorithm extending Atkin */ | 
| 1412 | 1 |  |  |  |  |  | UV a2, alpha, beta, b, d = 1; | 
| 1413 | 1 |  |  |  |  |  | a2 = addmod(a,a,p); | 
| 1414 | 1 |  |  |  |  |  | alpha = mont_powmod(a2, (p-9)>>4, p); | 
| 1415 | 1 | 50 |  |  |  |  | beta  = mont_mulmod(a2, mont_sqrmod(alpha,p), p); | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 50 |  |  |  |  |  | 
| 1416 | 1 | 50 |  |  |  |  | if (mont_sqrmod(beta,p) != submod(0,mont1,p)) { | 
|  |  | 50 |  |  |  |  |  | 
| 1417 | 5 | 100 |  |  |  |  | do { d += 2; } while (kronecker_uu(d,p) != -1 && d < p); | 
|  |  | 50 |  |  |  |  |  | 
| 1418 | 1 |  |  |  |  |  | d = mont_geta(d,p); | 
| 1419 | 1 | 50 |  |  |  |  | alpha = mont_mulmod(alpha, mont_powmod(d,(p-9)>>3,p), p); | 
| 1420 | 1 | 50 |  |  |  |  | beta  = mont_mulmod(a2, mont_mulmod(mont_sqrmod(d,p),mont_sqrmod(alpha,p),p), p); | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 50 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 50 |  |  |  |  |  | 
|  |  | 50 |  |  |  |  |  | 
| 1421 | 1 | 50 |  |  |  |  | beta  = mont_mulmod(submod(beta,mont1,p), d, p); | 
| 1422 |  |  |  |  |  |  | } else { | 
| 1423 | 0 |  |  |  |  |  | beta  = submod(beta, mont1, p); | 
| 1424 |  |  |  |  |  |  | } | 
| 1425 | 1 | 50 |  |  |  |  | b = mont_mulmod(alpha, mont_mulmod(a, beta, p), p); | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 50 |  |  |  |  |  | 
| 1426 | 1 | 50 |  |  |  |  | return mont_recover(b, p); | 
| 1427 |  |  |  |  |  |  | } | 
| 1428 |  |  |  |  |  |  |  | 
| 1429 |  |  |  |  |  |  | /* Verify Euler condition for odd p */ | 
| 1430 | 1 | 50 |  |  |  |  | if ((p & 1) && mont_powmod(a,(p-1)>>1,p) != mont1) return 0; | 
|  |  | 50 |  |  |  |  |  | 
| 1431 |  |  |  |  |  |  |  | 
| 1432 |  |  |  |  |  |  | { | 
| 1433 |  |  |  |  |  |  | UV x, q, e, t, z, r, m, b; | 
| 1434 | 1 |  |  |  |  |  | q = p-1; | 
| 1435 | 1 |  |  |  |  |  | e = valuation(q, 2); | 
| 1436 | 1 |  |  |  |  |  | q >>= e; | 
| 1437 | 1 |  |  |  |  |  | t = 3; | 
| 1438 | 1 | 50 |  |  |  |  | while (kronecker_uu(t, p) != -1) { | 
| 1439 | 0 |  |  |  |  |  | t += 2; | 
| 1440 | 0 | 0 |  |  |  |  | if (t == 201) {           /* exit if p looks like a composite */ | 
| 1441 | 0 | 0 |  |  |  |  | if ((p % 2) == 0 || powmod(2, p-1, p) != 1 || powmod(3, p-1, p) != 1) | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
| 1442 | 0 |  |  |  |  |  | return 0; | 
| 1443 | 0 | 0 |  |  |  |  | } else if (t >= 20000) {  /* should never happen */ | 
| 1444 | 0 |  |  |  |  |  | return 0; | 
| 1445 |  |  |  |  |  |  | } | 
| 1446 |  |  |  |  |  |  | } | 
| 1447 | 1 |  |  |  |  |  | t = mont_geta(t, p); | 
| 1448 | 1 |  |  |  |  |  | z = mont_powmod(t, q, p); | 
| 1449 | 1 |  |  |  |  |  | b = mont_powmod(a, q, p); | 
| 1450 | 1 |  |  |  |  |  | r = e; | 
| 1451 | 1 |  |  |  |  |  | q = (q+1) >> 1; | 
| 1452 | 1 |  |  |  |  |  | x = mont_powmod(a, q, p); | 
| 1453 | 3 | 100 |  |  |  |  | while (b != mont1) { | 
| 1454 | 2 |  |  |  |  |  | t = b; | 
| 1455 | 5 | 50 |  |  |  |  | for (m = 0; m < r && t != mont1; m++) | 
|  |  | 100 |  |  |  |  |  | 
| 1456 | 3 | 50 |  |  |  |  | t = mont_sqrmod(t, p); | 
| 1457 | 2 | 50 |  |  |  |  | if (m >= r) break; | 
| 1458 | 2 |  |  |  |  |  | t = mont_powmod(z, UVCONST(1) << (r-m-1), p); | 
| 1459 | 2 | 50 |  |  |  |  | x = mont_mulmod(x, t, p); | 
| 1460 | 2 | 50 |  |  |  |  | z = mont_mulmod(t, t, p); | 
| 1461 | 2 | 50 |  |  |  |  | b = mont_mulmod(b, z, p); | 
| 1462 | 2 |  |  |  |  |  | r = m; | 
| 1463 |  |  |  |  |  |  | } | 
| 1464 | 1 | 50 |  |  |  |  | return mont_recover(x, p); | 
| 1465 |  |  |  |  |  |  | } | 
| 1466 |  |  |  |  |  |  | return 0; | 
| 1467 |  |  |  |  |  |  | } | 
| 1468 |  |  |  |  |  |  | #endif | 
| 1469 |  |  |  |  |  |  |  | 
| 1470 | 10 |  |  |  |  |  | int sqrtmod(UV *s, UV a, UV p) { | 
| 1471 | 10 | 50 |  |  |  |  | if (p == 0) return 0; | 
| 1472 | 10 | 100 |  |  |  |  | if (a >= p) a %= p; | 
| 1473 | 10 | 50 |  |  |  |  | if (p <= 2 || a <= 1) return verify_sqrtmod(a, s,a,p); | 
|  |  | 100 |  |  |  |  |  | 
| 1474 |  |  |  |  |  |  |  | 
| 1475 | 8 |  |  |  |  |  | return verify_sqrtmod(_sqrtmod_prime(a,p), s,a,p); | 
| 1476 |  |  |  |  |  |  | } | 
| 1477 |  |  |  |  |  |  |  | 
| 1478 | 7 |  |  |  |  |  | int sqrtmod_composite(UV *s, UV a, UV n) { | 
| 1479 |  |  |  |  |  |  | UV fac[MPU_MAX_FACTORS+1]; | 
| 1480 |  |  |  |  |  |  | UV exp[MPU_MAX_FACTORS+1]; | 
| 1481 |  |  |  |  |  |  | UV sqr[MPU_MAX_FACTORS+1]; | 
| 1482 |  |  |  |  |  |  | UV p, j, k, gcdan; | 
| 1483 |  |  |  |  |  |  | int i, nfactors; | 
| 1484 |  |  |  |  |  |  |  | 
| 1485 | 7 | 100 |  |  |  |  | if (n == 0) return 0; | 
| 1486 | 5 | 50 |  |  |  |  | if (a >= n) a %= n; | 
| 1487 | 5 | 100 |  |  |  |  | if (n <= 2 || a <= 1) return verify_sqrtmod(a, s,a,n); | 
|  |  | 50 |  |  |  |  |  | 
| 1488 |  |  |  |  |  |  |  | 
| 1489 |  |  |  |  |  |  | /* Simple existence check.  It's still possible no solution exists.*/ | 
| 1490 | 3 | 50 |  |  |  |  | if (kronecker_uu(a, ((n%4) == 2) ? n/2 : n) == -1) return 0; | 
|  |  | 50 |  |  |  |  |  | 
| 1491 |  |  |  |  |  |  |  | 
| 1492 |  |  |  |  |  |  | /* if 8|n 'a' must = 1 mod 8, else if 4|n 'a' must = 1 mod 4 */ | 
| 1493 | 3 | 50 |  |  |  |  | if ((n % 4) == 0) { | 
| 1494 | 0 | 0 |  |  |  |  | if ((n % 8) == 0) { | 
| 1495 | 0 | 0 |  |  |  |  | if ((a % 8) != 1) return 0; | 
| 1496 |  |  |  |  |  |  | } else { | 
| 1497 | 0 | 0 |  |  |  |  | if ((a % 4) != 1) return 0; | 
| 1498 |  |  |  |  |  |  | } | 
| 1499 |  |  |  |  |  |  | } | 
| 1500 |  |  |  |  |  |  |  | 
| 1501 |  |  |  |  |  |  | /* More detailed existence check before factoring.  Still possible. */ | 
| 1502 | 3 |  |  |  |  |  | gcdan = gcd_ui(a, n); | 
| 1503 | 3 | 50 |  |  |  |  | if (gcdan == 1) { | 
| 1504 | 0 | 0 |  |  |  |  | if ((n % 3) == 0 && kronecker_uu(a, 3) != 1) return 0; | 
|  |  | 0 |  |  |  |  |  | 
| 1505 | 0 | 0 |  |  |  |  | if ((n % 5) == 0 && kronecker_uu(a, 5) != 1) return 0; | 
|  |  | 0 |  |  |  |  |  | 
| 1506 | 0 | 0 |  |  |  |  | if ((n % 7) == 0 && kronecker_uu(a, 7) != 1) return 0; | 
|  |  | 0 |  |  |  |  |  | 
| 1507 |  |  |  |  |  |  | } | 
| 1508 |  |  |  |  |  |  |  | 
| 1509 |  |  |  |  |  |  | /* Factor n */ | 
| 1510 | 3 |  |  |  |  |  | nfactors = factor_exp(n, fac, exp); | 
| 1511 |  |  |  |  |  |  |  | 
| 1512 |  |  |  |  |  |  | /* If gcd(a,n)==1, this answers comclusively if a solution exists. */ | 
| 1513 | 3 | 50 |  |  |  |  | if (gcdan == 1) { | 
| 1514 | 0 | 0 |  |  |  |  | for (i = 0; i < nfactors; i++) | 
| 1515 | 0 | 0 |  |  |  |  | if (fac[i] > 7 && kronecker_uu(a, fac[i]) != 1) return 0; | 
|  |  | 0 |  |  |  |  |  | 
| 1516 |  |  |  |  |  |  | } | 
| 1517 |  |  |  |  |  |  |  | 
| 1518 | 11 | 100 |  |  |  |  | for (i = 0; i < nfactors; i++) { | 
| 1519 |  |  |  |  |  |  |  | 
| 1520 |  |  |  |  |  |  | /* Powers of 2 */ | 
| 1521 | 8 | 100 |  |  |  |  | if (fac[i] == 2) { | 
| 1522 | 3 | 50 |  |  |  |  | if (exp[i] == 1) { | 
| 1523 | 3 |  |  |  |  |  | sqr[i] = a & 1; | 
| 1524 | 0 | 0 |  |  |  |  | } else if (exp[i] == 2) { | 
| 1525 | 0 |  |  |  |  |  | sqr[i] = 1;  /* and 3 */ | 
| 1526 |  |  |  |  |  |  | } else { | 
| 1527 |  |  |  |  |  |  | UV this_roots[256], next_roots[256]; | 
| 1528 | 0 |  |  |  |  |  | UV nthis = 0, nnext = 0; | 
| 1529 | 0 |  |  |  |  |  | this_roots[nthis++] = 1; | 
| 1530 | 0 |  |  |  |  |  | this_roots[nthis++] = 3; | 
| 1531 | 0 | 0 |  |  |  |  | for (j = 2; j < exp[i]; j++) { | 
| 1532 | 0 |  |  |  |  |  | p = UVCONST(1) << (j+1); | 
| 1533 | 0 |  |  |  |  |  | nnext = 0; | 
| 1534 | 0 | 0 |  |  |  |  | for (k = 0; k < nthis && nnext < 254; k++) { | 
|  |  | 0 |  |  |  |  |  | 
| 1535 | 0 |  |  |  |  |  | UV r = this_roots[k]; | 
| 1536 | 0 | 0 |  |  |  |  | if (sqrmod(r,p) == (a % p)) | 
| 1537 | 0 |  |  |  |  |  | next_roots[nnext++] = r; | 
| 1538 | 0 | 0 |  |  |  |  | if (sqrmod(p-r,p) == (a % p)) | 
| 1539 | 0 |  |  |  |  |  | next_roots[nnext++] = p-r; | 
| 1540 |  |  |  |  |  |  | } | 
| 1541 | 0 | 0 |  |  |  |  | if (nnext == 0) return 0; | 
| 1542 |  |  |  |  |  |  | /* copy next exponent's found roots to this one */ | 
| 1543 | 0 |  |  |  |  |  | nthis = nnext; | 
| 1544 | 0 | 0 |  |  |  |  | for (k = 0; k < nnext; k++) | 
| 1545 | 0 |  |  |  |  |  | this_roots[k] = next_roots[k]; | 
| 1546 |  |  |  |  |  |  | } | 
| 1547 | 0 |  |  |  |  |  | sqr[i] = this_roots[0]; | 
| 1548 |  |  |  |  |  |  | } | 
| 1549 | 3 |  |  |  |  |  | continue; | 
| 1550 |  |  |  |  |  |  | } | 
| 1551 |  |  |  |  |  |  |  | 
| 1552 |  |  |  |  |  |  | /* p is an odd prime */ | 
| 1553 | 5 |  |  |  |  |  | p = fac[i]; | 
| 1554 | 5 | 50 |  |  |  |  | if (!sqrtmod(&(sqr[i]), a, p)) | 
| 1555 | 0 |  |  |  |  |  | return 0; | 
| 1556 |  |  |  |  |  |  |  | 
| 1557 |  |  |  |  |  |  | /* Lift solution of x^2 = a mod p  to  x^2 = a mod p^e */ | 
| 1558 | 5 | 50 |  |  |  |  | for (j = 1; j < exp[i]; j++) { | 
| 1559 |  |  |  |  |  |  | UV xk2, yk, expect, sol; | 
| 1560 | 0 |  |  |  |  |  | xk2 = addmod(sqr[i],sqr[i],p); | 
| 1561 | 0 |  |  |  |  |  | yk = modinverse(xk2, p); | 
| 1562 | 0 |  |  |  |  |  | expect = mulmod(xk2,yk,p); | 
| 1563 | 0 |  |  |  |  |  | p *= fac[i]; | 
| 1564 | 0 |  |  |  |  |  | sol = submod(sqr[i], mulmod(submod(sqrmod(sqr[i],p), a % p, p), yk, p), p); | 
| 1565 | 0 | 0 |  |  |  |  | if (expect != 1 || sqrmod(sol,p) != (a % p)) { | 
|  |  | 0 |  |  |  |  |  | 
| 1566 |  |  |  |  |  |  | /* printf("a %lu failure to lift to %lu^%d\n", a, fac[i], j+1); */ | 
| 1567 | 0 |  |  |  |  |  | return 0; | 
| 1568 |  |  |  |  |  |  | } | 
| 1569 | 0 |  |  |  |  |  | sqr[i] = sol; | 
| 1570 |  |  |  |  |  |  | } | 
| 1571 |  |  |  |  |  |  | } | 
| 1572 |  |  |  |  |  |  |  | 
| 1573 |  |  |  |  |  |  | /* raise fac[i] */ | 
| 1574 | 11 | 100 |  |  |  |  | for (i = 0; i < nfactors; i++) | 
| 1575 | 8 |  |  |  |  |  | fac[i] = ipow(fac[i], exp[i]); | 
| 1576 |  |  |  |  |  |  |  | 
| 1577 | 3 |  |  |  |  |  | p = chinese(sqr, fac, nfactors, &i); | 
| 1578 | 7 | 50 |  |  |  |  | return (i == 1) ? verify_sqrtmod(p, s, a, n) : 0; | 
| 1579 |  |  |  |  |  |  | } | 
| 1580 |  |  |  |  |  |  |  | 
| 1581 |  |  |  |  |  |  | /* works only for co-prime inputs and also slower than the algorithm below, | 
| 1582 |  |  |  |  |  |  | * but handles the case where IV_MAX < lcm <= UV_MAX. | 
| 1583 |  |  |  |  |  |  | */ | 
| 1584 | 4 |  |  |  |  |  | static UV _simple_chinese(UV* a, UV* n, UV num, int* status) { | 
| 1585 | 4 |  |  |  |  |  | UV i, lcm = 1, res = 0; | 
| 1586 | 4 |  |  |  |  |  | *status = 0; | 
| 1587 | 4 | 50 |  |  |  |  | if (num == 0) return 0; | 
| 1588 |  |  |  |  |  |  |  | 
| 1589 | 8 | 50 |  |  |  |  | for (i = 0; i < num; i++) { | 
| 1590 | 8 |  |  |  |  |  | UV ni = n[i]; | 
| 1591 | 8 |  |  |  |  |  | UV gcd = gcd_ui(lcm, ni); | 
| 1592 | 8 | 100 |  |  |  |  | if (gcd != 1) return 0;           /* not coprime */ | 
| 1593 | 5 |  |  |  |  |  | ni /= gcd; | 
| 1594 | 5 | 100 |  |  |  |  | if (ni > (UV_MAX/lcm)) return 0;  /* lcm overflow */ | 
| 1595 | 4 |  |  |  |  |  | lcm *= ni; | 
| 1596 |  |  |  |  |  |  | } | 
| 1597 | 0 | 0 |  |  |  |  | for (i = 0; i < num; i++) { | 
| 1598 |  |  |  |  |  |  | UV p, inverse, term; | 
| 1599 | 0 |  |  |  |  |  | p = lcm / n[i]; | 
| 1600 | 0 |  |  |  |  |  | inverse = modinverse(p, n[i]); | 
| 1601 | 0 | 0 |  |  |  |  | if (inverse == 0) return 0;       /* n's coprime so should never happen */ | 
| 1602 | 0 |  |  |  |  |  | term = mulmod(p, mulmod(a[i], inverse, lcm), lcm); | 
| 1603 | 0 |  |  |  |  |  | res = addmod(res, term, lcm); | 
| 1604 |  |  |  |  |  |  | } | 
| 1605 | 0 |  |  |  |  |  | *status = 1; | 
| 1606 | 0 |  |  |  |  |  | return res; | 
| 1607 |  |  |  |  |  |  | } | 
| 1608 |  |  |  |  |  |  |  | 
| 1609 |  |  |  |  |  |  | /* status: 1 ok, -1 no inverse, 0 overflow */ | 
| 1610 | 30 |  |  |  |  |  | UV chinese(UV* a, UV* n, UV num, int* status) { | 
| 1611 |  |  |  |  |  |  | static unsigned short sgaps[] = {7983,3548,1577,701,301,132,57,23,10,4,1,0}; | 
| 1612 |  |  |  |  |  |  | UV gcd, i, j, lcm, sum, gi, gap; | 
| 1613 | 30 |  |  |  |  |  | *status = 1; | 
| 1614 | 30 | 100 |  |  |  |  | if (num == 0) return 0; | 
| 1615 |  |  |  |  |  |  |  | 
| 1616 |  |  |  |  |  |  | /* Sort modulii, largest first */ | 
| 1617 | 348 | 100 |  |  |  |  | for (gi = 0, gap = sgaps[gi]; gap >= 1; gap = sgaps[++gi]) { | 
| 1618 | 361 | 100 |  |  |  |  | for (i = gap; i < num; i++) { | 
| 1619 | 42 |  |  |  |  |  | UV tn = n[i], ta = a[i]; | 
| 1620 | 84 | 100 |  |  |  |  | for (j = i; j >= gap && n[j-gap] < tn; j -= gap) | 
|  |  | 100 |  |  |  |  |  | 
| 1621 | 42 |  |  |  |  |  | {  n[j] = n[j-gap];  a[j] = a[j-gap];  } | 
| 1622 | 42 |  |  |  |  |  | n[j] = tn;  a[j] = ta; | 
| 1623 |  |  |  |  |  |  | } | 
| 1624 |  |  |  |  |  |  | } | 
| 1625 |  |  |  |  |  |  |  | 
| 1626 | 29 | 100 |  |  |  |  | if (n[0] > IV_MAX) return _simple_chinese(a,n,num,status); | 
| 1627 | 28 |  |  |  |  |  | lcm = n[0]; sum = a[0] % n[0]; | 
| 1628 | 58 | 100 |  |  |  |  | for (i = 1; i < num; i++) { | 
| 1629 |  |  |  |  |  |  | IV u, v, t, s; | 
| 1630 |  |  |  |  |  |  | UV vs, ut; | 
| 1631 | 38 |  |  |  |  |  | gcd = gcdext(lcm, n[i], &u, &v, &s, &t); | 
| 1632 | 41 | 100 |  |  |  |  | if (gcd != 1 && ((sum % gcd) != (a[i] % gcd))) { *status = -1; return 0; } | 
|  |  | 100 |  |  |  |  |  | 
| 1633 | 33 | 100 |  |  |  |  | if (s < 0) s = -s; | 
| 1634 | 33 | 100 |  |  |  |  | if (t < 0) t = -t; | 
| 1635 | 33 | 100 |  |  |  |  | if (s > (IV)(IV_MAX/lcm)) return _simple_chinese(a,n,num,status); | 
| 1636 | 30 |  |  |  |  |  | lcm *= s; | 
| 1637 | 30 | 100 |  |  |  |  | if (u < 0) u += lcm; | 
| 1638 | 30 | 100 |  |  |  |  | if (v < 0) v += lcm; | 
| 1639 | 30 |  |  |  |  |  | vs = mulmod((UV)v, (UV)s, lcm); | 
| 1640 | 30 |  |  |  |  |  | ut = mulmod((UV)u, (UV)t, lcm); | 
| 1641 | 30 |  |  |  |  |  | sum = addmod(  mulmod(vs, sum, lcm),  mulmod(ut, a[i], lcm),  lcm  ); | 
| 1642 |  |  |  |  |  |  | } | 
| 1643 | 20 |  |  |  |  |  | return sum; | 
| 1644 |  |  |  |  |  |  | } | 
| 1645 |  |  |  |  |  |  |  | 
| 1646 | 18 |  |  |  |  |  | long double chebyshev_function(UV n, int which) | 
| 1647 |  |  |  |  |  |  | { | 
| 1648 | 18 |  |  |  |  |  | long double logp, logn = logl(n); | 
| 1649 | 18 | 100 |  |  |  |  | UV sqrtn = which ? isqrt(n) : 0;  /* for theta, p <= sqrtn always false */ | 
| 1650 | 18 |  |  |  |  |  | KAHAN_INIT(sum); | 
| 1651 |  |  |  |  |  |  |  | 
| 1652 | 18 | 100 |  |  |  |  | if (n < 500) { | 
| 1653 |  |  |  |  |  |  | UV p, pi; | 
| 1654 | 136 | 100 |  |  |  |  | for (pi = 1;  (p = nth_prime(pi)) <= n; pi++) { | 
| 1655 | 122 |  |  |  |  |  | logp = logl(p); | 
| 1656 | 122 | 100 |  |  |  |  | if (p <= sqrtn) logp *= floorl(logn/logp+1e-15); | 
| 1657 | 122 |  |  |  |  |  | KAHAN_SUM(sum, logp); | 
| 1658 |  |  |  |  |  |  | } | 
| 1659 |  |  |  |  |  |  | } else { | 
| 1660 |  |  |  |  |  |  | UV seg_base, seg_low, seg_high; | 
| 1661 |  |  |  |  |  |  | unsigned char* segment; | 
| 1662 |  |  |  |  |  |  | void* ctx; | 
| 1663 | 4 |  |  |  |  |  | long double logl2 = logl(2); | 
| 1664 | 4 |  |  |  |  |  | long double logl3 = logl(3); | 
| 1665 | 4 |  |  |  |  |  | long double logl5 = logl(5); | 
| 1666 | 4 | 100 |  |  |  |  | if (!which) { | 
| 1667 | 2 |  |  |  |  |  | KAHAN_SUM(sum,logl2); KAHAN_SUM(sum,logl3); KAHAN_SUM(sum,logl5); | 
| 1668 |  |  |  |  |  |  | } else { | 
| 1669 | 2 |  |  |  |  |  | KAHAN_SUM(sum, logl2 * floorl(logn/logl2 + 1e-15)); | 
| 1670 | 2 |  |  |  |  |  | KAHAN_SUM(sum, logl3 * floorl(logn/logl3 + 1e-15)); | 
| 1671 | 2 |  |  |  |  |  | KAHAN_SUM(sum, logl5 * floorl(logn/logl5 + 1e-15)); | 
| 1672 |  |  |  |  |  |  | } | 
| 1673 | 4 |  |  |  |  |  | ctx = start_segment_primes(7, n, &segment); | 
| 1674 | 10 | 100 |  |  |  |  | while (next_segment_primes(ctx, &seg_base, &seg_low, &seg_high)) { | 
| 1675 | 225236 | 50 |  |  |  |  | START_DO_FOR_EACH_SIEVE_PRIME( segment, seg_base, seg_low, seg_high ) { | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 50 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
| 1676 | 213910 |  |  |  |  |  | logp = logl(p); | 
| 1677 | 213910 | 100 |  |  |  |  | if (p <= sqrtn) logp *= floorl(logn/logp+1e-15); | 
| 1678 | 213910 |  |  |  |  |  | KAHAN_SUM(sum, logp); | 
| 1679 | 11320 |  |  |  |  |  | } END_DO_FOR_EACH_SIEVE_PRIME | 
| 1680 |  |  |  |  |  |  | } | 
| 1681 | 4 |  |  |  |  |  | end_segment_primes(ctx); | 
| 1682 |  |  |  |  |  |  | } | 
| 1683 | 18 |  |  |  |  |  | return sum; | 
| 1684 |  |  |  |  |  |  | } | 
| 1685 |  |  |  |  |  |  |  | 
| 1686 |  |  |  |  |  |  |  | 
| 1687 |  |  |  |  |  |  |  | 
| 1688 |  |  |  |  |  |  | /* | 
| 1689 |  |  |  |  |  |  | * See: | 
| 1690 |  |  |  |  |  |  | *  "Multiple-Precision Exponential Integral and Related Functions" | 
| 1691 |  |  |  |  |  |  | *      by David M. Smith | 
| 1692 |  |  |  |  |  |  | *  "On the Evaluation of the Complex-Valued Exponential Integral" | 
| 1693 |  |  |  |  |  |  | *      by Vincent Pegoraro and Philipp Slusallek | 
| 1694 |  |  |  |  |  |  | *  "Numerical Recipes" 3rd edition | 
| 1695 |  |  |  |  |  |  | *      by William H. Press et al. | 
| 1696 |  |  |  |  |  |  | *  "Rational Chevyshev Approximations for the Exponential Integral E_1(x)" | 
| 1697 |  |  |  |  |  |  | *      by W. J. Cody and Henry C. Thacher, Jr. | 
| 1698 |  |  |  |  |  |  | * | 
| 1699 |  |  |  |  |  |  | * Any mistakes here are completely my fault.  This code has not been | 
| 1700 |  |  |  |  |  |  | * verified for anything serious.  For better results, see: | 
| 1701 |  |  |  |  |  |  | *    http://www.trnicely.net/pi/pix_0000.htm | 
| 1702 |  |  |  |  |  |  | * which although the author claims are demonstration programs, will | 
| 1703 |  |  |  |  |  |  | * undoubtedly produce more reliable results than this code does (I don't | 
| 1704 |  |  |  |  |  |  | * know of any obvious issues with this code, but it just hasn't been used | 
| 1705 |  |  |  |  |  |  | * by many people). | 
| 1706 |  |  |  |  |  |  | */ | 
| 1707 |  |  |  |  |  |  |  | 
| 1708 |  |  |  |  |  |  | static long double const euler_mascheroni = 0.57721566490153286060651209008240243104215933593992L; | 
| 1709 |  |  |  |  |  |  | static long double const li2 = 1.045163780117492784844588889194613136522615578151L; | 
| 1710 |  |  |  |  |  |  |  | 
| 1711 | 234 |  |  |  |  |  | long double Ei(long double x) { | 
| 1712 |  |  |  |  |  |  | long double val, term; | 
| 1713 |  |  |  |  |  |  | unsigned int n; | 
| 1714 | 234 |  |  |  |  |  | KAHAN_INIT(sum); | 
| 1715 |  |  |  |  |  |  |  | 
| 1716 | 234 | 50 |  |  |  |  | if (x == 0) croak("Invalid input to ExponentialIntegral:  x must be != 0"); | 
| 1717 |  |  |  |  |  |  | /* Protect against messed up rounding modes */ | 
| 1718 | 234 | 50 |  |  |  |  | if (x >=  12000) return INFINITY; | 
| 1719 | 234 | 50 |  |  |  |  | if (x <= -12000) return 0; | 
| 1720 |  |  |  |  |  |  |  | 
| 1721 | 234 | 100 |  |  |  |  | if (x < -1) { | 
| 1722 |  |  |  |  |  |  | /* Continued fraction, good for x < -1 */ | 
| 1723 | 1 |  |  |  |  |  | long double lc = 0; | 
| 1724 | 1 |  |  |  |  |  | long double ld = 1.0L / (1.0L - (long double)x); | 
| 1725 | 1 |  |  |  |  |  | val = ld * (-expl(x)); | 
| 1726 | 21 | 50 |  |  |  |  | for (n = 1; n <= 100000; n++) { | 
| 1727 |  |  |  |  |  |  | long double old, t, n2; | 
| 1728 | 20 |  |  |  |  |  | t = (long double)(2*n + 1) - (long double) x; | 
| 1729 | 20 |  |  |  |  |  | n2 = n * n; | 
| 1730 | 20 |  |  |  |  |  | lc = 1.0L / (t - n2 * lc); | 
| 1731 | 20 |  |  |  |  |  | ld = 1.0L / (t - n2 * ld); | 
| 1732 | 20 |  |  |  |  |  | old = val; | 
| 1733 | 20 |  |  |  |  |  | val *= ld/lc; | 
| 1734 | 20 | 100 |  |  |  |  | if ( fabsl(val-old) <= LDBL_EPSILON*fabsl(val) ) | 
| 1735 | 1 |  |  |  |  |  | break; | 
| 1736 |  |  |  |  |  |  | } | 
| 1737 | 233 | 100 |  |  |  |  | } else if (x < 0) { | 
| 1738 |  |  |  |  |  |  | /* Rational Chebyshev approximation (Cody, Thacher), good for -1 < x < 0 */ | 
| 1739 |  |  |  |  |  |  | static const long double C6p[7] = { -148151.02102575750838086L, | 
| 1740 |  |  |  |  |  |  | 150260.59476436982420737L, | 
| 1741 |  |  |  |  |  |  | 89904.972007457256553251L, | 
| 1742 |  |  |  |  |  |  | 15924.175980637303639884L, | 
| 1743 |  |  |  |  |  |  | 2150.0672908092918123209L, | 
| 1744 |  |  |  |  |  |  | 116.69552669734461083368L, | 
| 1745 |  |  |  |  |  |  | 5.0196785185439843791020L }; | 
| 1746 |  |  |  |  |  |  | static const long double C6q[7] = {  256664.93484897117319268L, | 
| 1747 |  |  |  |  |  |  | 184340.70063353677359298L, | 
| 1748 |  |  |  |  |  |  | 52440.529172056355429883L, | 
| 1749 |  |  |  |  |  |  | 8125.8035174768735759866L, | 
| 1750 |  |  |  |  |  |  | 750.43163907103936624165L, | 
| 1751 |  |  |  |  |  |  | 40.205465640027706061433L, | 
| 1752 |  |  |  |  |  |  | 1.0000000000000000000000L }; | 
| 1753 | 5 |  |  |  |  |  | long double sumn = C6p[0]-x*(C6p[1]-x*(C6p[2]-x*(C6p[3]-x*(C6p[4]-x*(C6p[5]-x*C6p[6]))))); | 
| 1754 | 5 |  |  |  |  |  | long double sumd = C6q[0]-x*(C6q[1]-x*(C6q[2]-x*(C6q[3]-x*(C6q[4]-x*(C6q[5]-x*C6q[6]))))); | 
| 1755 | 5 |  |  |  |  |  | val = logl(-x) - sumn/sumd; | 
| 1756 | 228 | 50 |  |  |  |  | } else if (x < (-2 * logl(LDBL_EPSILON))) { | 
| 1757 |  |  |  |  |  |  | /* Convergent series.  Accurate but slow especially with large x. */ | 
| 1758 | 228 |  |  |  |  |  | long double fact_n = x; | 
| 1759 | 15320 | 50 |  |  |  |  | for (n = 2; n <= 200; n++) { | 
| 1760 | 15320 |  |  |  |  |  | long double invn = 1.0L / n; | 
| 1761 | 15320 |  |  |  |  |  | fact_n *= (long double)x * invn; | 
| 1762 | 15320 |  |  |  |  |  | term = fact_n * invn; | 
| 1763 | 15320 |  |  |  |  |  | KAHAN_SUM(sum, term); | 
| 1764 |  |  |  |  |  |  | /* printf("C  after adding %.20Lf, val = %.20Lf\n", term, sum); */ | 
| 1765 | 15320 | 100 |  |  |  |  | if (term < LDBL_EPSILON*sum) break; | 
| 1766 |  |  |  |  |  |  | } | 
| 1767 | 228 |  |  |  |  |  | KAHAN_SUM(sum, euler_mascheroni); | 
| 1768 | 228 |  |  |  |  |  | KAHAN_SUM(sum, logl(x)); | 
| 1769 | 228 |  |  |  |  |  | KAHAN_SUM(sum, x); | 
| 1770 | 228 |  |  |  |  |  | val = sum; | 
| 1771 | 0 | 0 |  |  |  |  | } else if (x >= 24) { | 
| 1772 |  |  |  |  |  |  | /* Cody / Thacher rational Chebyshev */ | 
| 1773 |  |  |  |  |  |  | static const long double P2[10] = { | 
| 1774 |  |  |  |  |  |  | 1.75338801265465972390E02L,-2.23127670777632409550E02L, | 
| 1775 |  |  |  |  |  |  | -1.81949664929868906455E01L,-2.79798528624305389340E01L, | 
| 1776 |  |  |  |  |  |  | -7.63147701620253630855E00L,-1.52856623636929636839E01L, | 
| 1777 |  |  |  |  |  |  | -7.06810977895029358836E00L,-5.00006640413131002475E00L, | 
| 1778 |  |  |  |  |  |  | -3.00000000320981265753E00L, 1.00000000000000485503E00L }; | 
| 1779 |  |  |  |  |  |  | static const long double Q2[9] = { | 
| 1780 |  |  |  |  |  |  | 3.97845977167414720840E04L, 3.97277109100414518365E00L, | 
| 1781 |  |  |  |  |  |  | 1.37790390235747998793E02L, 1.17179220502086455287E02L, | 
| 1782 |  |  |  |  |  |  | 7.04831847180424675988E01L,-1.20187763547154743238E01L, | 
| 1783 |  |  |  |  |  |  | -7.99243595776339741065E00L,-2.99999894040324959612E00L, | 
| 1784 |  |  |  |  |  |  | 1.99999999999048104167E00L }; | 
| 1785 | 0 |  |  |  |  |  | long double invx = 1.0L / x; | 
| 1786 | 0 |  |  |  |  |  | long double frac = 0.0; | 
| 1787 | 0 | 0 |  |  |  |  | for (n = 0; n <= 8; n++) | 
| 1788 | 0 |  |  |  |  |  | frac = Q2[n] / (P2[n] + x + frac); | 
| 1789 | 0 |  |  |  |  |  | frac += P2[9]; | 
| 1790 | 0 |  |  |  |  |  | val = expl(x) * (invx + invx*invx*frac); | 
| 1791 |  |  |  |  |  |  | } else { | 
| 1792 |  |  |  |  |  |  | /* Asymptotic divergent series */ | 
| 1793 | 0 |  |  |  |  |  | long double invx = 1.0L / x; | 
| 1794 | 0 |  |  |  |  |  | term = 1.0; | 
| 1795 | 0 | 0 |  |  |  |  | for (n = 1; n <= 200; n++) { | 
| 1796 | 0 |  |  |  |  |  | long double last_term = term; | 
| 1797 | 0 |  |  |  |  |  | term = term * ( (long double)n * invx ); | 
| 1798 | 0 | 0 |  |  |  |  | if (term < LDBL_EPSILON*sum) break; | 
| 1799 | 0 | 0 |  |  |  |  | if (term < last_term) { | 
| 1800 | 0 |  |  |  |  |  | KAHAN_SUM(sum, term); | 
| 1801 |  |  |  |  |  |  | /* printf("A  after adding %.20llf, sum = %.20llf\n", term, sum); */ | 
| 1802 |  |  |  |  |  |  | } else { | 
| 1803 | 0 |  |  |  |  |  | KAHAN_SUM(sum, (-last_term/3) ); | 
| 1804 |  |  |  |  |  |  | /* printf("A  after adding %.20llf, sum = %.20llf\n", -last_term/3, sum); */ | 
| 1805 | 0 |  |  |  |  |  | break; | 
| 1806 |  |  |  |  |  |  | } | 
| 1807 |  |  |  |  |  |  | } | 
| 1808 | 0 |  |  |  |  |  | KAHAN_SUM(sum, 1.0L); | 
| 1809 | 0 |  |  |  |  |  | val = expl(x) * sum * invx; | 
| 1810 |  |  |  |  |  |  | } | 
| 1811 |  |  |  |  |  |  |  | 
| 1812 | 234 |  |  |  |  |  | return val; | 
| 1813 |  |  |  |  |  |  | } | 
| 1814 |  |  |  |  |  |  |  | 
| 1815 | 2213 |  |  |  |  |  | long double Li(long double x) { | 
| 1816 | 2213 | 50 |  |  |  |  | if (x == 0) return 0; | 
| 1817 | 2213 | 50 |  |  |  |  | if (x == 1) return -INFINITY; | 
| 1818 | 2213 | 50 |  |  |  |  | if (x == 2) return li2; | 
| 1819 | 2213 | 50 |  |  |  |  | if (x < 0) croak("Invalid input to LogarithmicIntegral:  x must be >= 0"); | 
| 1820 | 2213 | 50 |  |  |  |  | if (x >= LDBL_MAX) return INFINITY; | 
| 1821 |  |  |  |  |  |  |  | 
| 1822 |  |  |  |  |  |  | /* Calculate directly using Ramanujan's series. */ | 
| 1823 | 2213 | 50 |  |  |  |  | if (x > 1) { | 
| 1824 | 2213 |  |  |  |  |  | const long double logx = logl(x); | 
| 1825 | 2213 |  |  |  |  |  | long double sum = 0, inner_sum = 0, old_sum, factorial = 1, power2 = 1; | 
| 1826 | 2213 |  |  |  |  |  | long double q, p = -1; | 
| 1827 | 2213 |  |  |  |  |  | int k = 0, n = 0; | 
| 1828 |  |  |  |  |  |  |  | 
| 1829 | 89173 | 50 |  |  |  |  | for (n = 1, k = 0; n < 200; n++) { | 
| 1830 | 89173 |  |  |  |  |  | factorial *= n; | 
| 1831 | 89173 |  |  |  |  |  | p *= -logx; | 
| 1832 | 89173 |  |  |  |  |  | q = factorial * power2; | 
| 1833 | 89173 |  |  |  |  |  | power2 *= 2; | 
| 1834 | 134407 | 100 |  |  |  |  | for (; k <= (n - 1) / 2; k++) | 
| 1835 | 45234 |  |  |  |  |  | inner_sum += 1.0L / (2 * k + 1); | 
| 1836 | 89173 |  |  |  |  |  | old_sum = sum; | 
| 1837 | 89173 |  |  |  |  |  | sum += (p / q) * inner_sum; | 
| 1838 | 89173 | 100 |  |  |  |  | if (fabsl(sum - old_sum) <= LDBL_EPSILON) break; | 
| 1839 |  |  |  |  |  |  | } | 
| 1840 | 2213 |  |  |  |  |  | return euler_mascheroni + logl(logx) + sqrtl(x) * sum; | 
| 1841 |  |  |  |  |  |  | } | 
| 1842 |  |  |  |  |  |  |  | 
| 1843 | 0 |  |  |  |  |  | return Ei(logl(x)); | 
| 1844 |  |  |  |  |  |  | } | 
| 1845 |  |  |  |  |  |  |  | 
| 1846 | 97 |  |  |  |  |  | UV inverse_li(UV x) { | 
| 1847 |  |  |  |  |  |  | UV r; | 
| 1848 |  |  |  |  |  |  | int i; | 
| 1849 | 97 |  |  |  |  |  | long double t, lx = (long double)x, term, old_term = 0; | 
| 1850 | 97 | 100 |  |  |  |  | if (x <= 2) return x + (x > 0); | 
| 1851 |  |  |  |  |  |  | /* Iterate Halley's method until error grows. */ | 
| 1852 | 431 | 100 |  |  |  |  | for (i = 0, t = lx*logl(x); i < 4; i++) { | 
| 1853 | 376 |  |  |  |  |  | long double dn = Li(t) - lx; | 
| 1854 | 376 |  |  |  |  |  | term = dn*logl(t) / (1.0L + dn/(2*t)); | 
| 1855 | 376 | 100 |  |  |  |  | if (i > 0 && fabsl(term) >= fabsl(old_term)) { t -= term/4; break; } | 
|  |  | 100 |  |  |  |  |  | 
| 1856 | 337 |  |  |  |  |  | old_term = term; | 
| 1857 | 337 |  |  |  |  |  | t -= term; | 
| 1858 |  |  |  |  |  |  | } | 
| 1859 | 94 |  |  |  |  |  | r = (UV)ceill(t); | 
| 1860 |  |  |  |  |  |  |  | 
| 1861 |  |  |  |  |  |  | /* Meet our more stringent goal of an exact answer. */ | 
| 1862 | 94 | 50 |  |  |  |  | i = (x > 4e16) ? 2048 : 128; | 
| 1863 | 94 | 50 |  |  |  |  | if (Li(r-1) >= lx) { | 
| 1864 | 0 | 0 |  |  |  |  | while (Li(r-i) >= lx) r -= i; | 
| 1865 | 0 | 0 |  |  |  |  | for (i = i/2; i > 0; i /= 2) | 
| 1866 | 0 | 0 |  |  |  |  | if (Li(r-i) >= lx) r -= i; | 
| 1867 |  |  |  |  |  |  | } else { | 
| 1868 | 94 | 50 |  |  |  |  | while (Li(r+i-1) < lx) r += i; | 
| 1869 | 752 | 100 |  |  |  |  | for (i = i/2; i > 0; i /= 2) | 
| 1870 | 658 | 50 |  |  |  |  | if (Li(r+i-1) < lx) r += i; | 
| 1871 |  |  |  |  |  |  | } | 
| 1872 | 94 |  |  |  |  |  | return r; | 
| 1873 |  |  |  |  |  |  | } | 
| 1874 |  |  |  |  |  |  |  | 
| 1875 | 23 |  |  |  |  |  | UV inverse_R(UV x) { | 
| 1876 |  |  |  |  |  |  | int i; | 
| 1877 | 23 |  |  |  |  |  | long double t, dn, lx = (long double) x, term, old_term = 0; | 
| 1878 | 23 | 50 |  |  |  |  | if (x <= 2) return x + (x > 0); | 
| 1879 |  |  |  |  |  |  |  | 
| 1880 |  |  |  |  |  |  | /* Rough estimate */ | 
| 1881 | 23 |  |  |  |  |  | t = lx * logl(x); | 
| 1882 |  |  |  |  |  |  | /* Improve: approx inverse li with one round of Halley */ | 
| 1883 | 23 |  |  |  |  |  | dn = Li(t) - lx; | 
| 1884 | 23 |  |  |  |  |  | t = t - dn * logl(t) / (1.0L + dn/(2*t)); | 
| 1885 |  |  |  |  |  |  | /* Iterate 1-4 rounds of Halley */ | 
| 1886 | 102 | 100 |  |  |  |  | for (i = 0; i < 4; i++) { | 
| 1887 | 89 |  |  |  |  |  | dn = RiemannR(t) - lx; | 
| 1888 | 89 |  |  |  |  |  | term = dn * logl(t) / (1.0L + dn/(2*t)); | 
| 1889 | 89 | 100 |  |  |  |  | if (i > 0 && fabsl(term) >= fabsl(old_term)) { t -= term/4; break; } | 
|  |  | 100 |  |  |  |  |  | 
| 1890 | 79 |  |  |  |  |  | old_term = term; | 
| 1891 | 79 |  |  |  |  |  | t -= term; | 
| 1892 |  |  |  |  |  |  | } | 
| 1893 | 23 |  |  |  |  |  | return (UV)ceill(t); | 
| 1894 |  |  |  |  |  |  | } | 
| 1895 |  |  |  |  |  |  |  | 
| 1896 |  |  |  |  |  |  |  | 
| 1897 |  |  |  |  |  |  | /* | 
| 1898 |  |  |  |  |  |  | * Storing the first 10-20 Zeta values makes sense.  Past that it is purely | 
| 1899 |  |  |  |  |  |  | * to avoid making the call to generate them ourselves.  We could cache the | 
| 1900 |  |  |  |  |  |  | * calculated values. These all have 1 subtracted from them.  */ | 
| 1901 |  |  |  |  |  |  | static const long double riemann_zeta_table[] = { | 
| 1902 |  |  |  |  |  |  | 0.6449340668482264364724151666460251892L,  /* zeta(2) */ | 
| 1903 |  |  |  |  |  |  | 0.2020569031595942853997381615114499908L, | 
| 1904 |  |  |  |  |  |  | 0.0823232337111381915160036965411679028L, | 
| 1905 |  |  |  |  |  |  | 0.0369277551433699263313654864570341681L, | 
| 1906 |  |  |  |  |  |  | 0.0173430619844491397145179297909205279L, | 
| 1907 |  |  |  |  |  |  | 0.0083492773819228268397975498497967596L, | 
| 1908 |  |  |  |  |  |  | 0.0040773561979443393786852385086524653L, | 
| 1909 |  |  |  |  |  |  | 0.0020083928260822144178527692324120605L, | 
| 1910 |  |  |  |  |  |  | 0.0009945751278180853371459589003190170L, | 
| 1911 |  |  |  |  |  |  | 0.0004941886041194645587022825264699365L, | 
| 1912 |  |  |  |  |  |  | 0.0002460865533080482986379980477396710L, | 
| 1913 |  |  |  |  |  |  | 0.0001227133475784891467518365263573957L, | 
| 1914 |  |  |  |  |  |  | 0.0000612481350587048292585451051353337L, | 
| 1915 |  |  |  |  |  |  | 0.0000305882363070204935517285106450626L, | 
| 1916 |  |  |  |  |  |  | 0.0000152822594086518717325714876367220L, | 
| 1917 |  |  |  |  |  |  | 0.0000076371976378997622736002935630292L,  /* zeta(17)  Past here all we're */ | 
| 1918 |  |  |  |  |  |  | 0.0000038172932649998398564616446219397L,  /* zeta(18)  getting is speed.   */ | 
| 1919 |  |  |  |  |  |  | 0.0000019082127165539389256569577951013L, | 
| 1920 |  |  |  |  |  |  | 0.0000009539620338727961131520386834493L, | 
| 1921 |  |  |  |  |  |  | 0.0000004769329867878064631167196043730L, | 
| 1922 |  |  |  |  |  |  | 0.0000002384505027277329900036481867530L, | 
| 1923 |  |  |  |  |  |  | 0.0000001192199259653110730677887188823L, | 
| 1924 |  |  |  |  |  |  | 0.0000000596081890512594796124402079358L, | 
| 1925 |  |  |  |  |  |  | 0.0000000298035035146522801860637050694L, | 
| 1926 |  |  |  |  |  |  | 0.0000000149015548283650412346585066307L, | 
| 1927 |  |  |  |  |  |  | 0.0000000074507117898354294919810041706L, | 
| 1928 |  |  |  |  |  |  | 0.0000000037253340247884570548192040184L, | 
| 1929 |  |  |  |  |  |  | 0.0000000018626597235130490064039099454L, | 
| 1930 |  |  |  |  |  |  | 0.0000000009313274324196681828717647350L, | 
| 1931 |  |  |  |  |  |  | 0.0000000004656629065033784072989233251L, | 
| 1932 |  |  |  |  |  |  | 0.0000000002328311833676505492001455976L, | 
| 1933 |  |  |  |  |  |  | 0.0000000001164155017270051977592973835L, | 
| 1934 |  |  |  |  |  |  | 0.0000000000582077208790270088924368599L, | 
| 1935 |  |  |  |  |  |  | 0.0000000000291038504449709968692942523L, | 
| 1936 |  |  |  |  |  |  | 0.0000000000145519218910419842359296322L, | 
| 1937 |  |  |  |  |  |  | 0.0000000000072759598350574810145208690L, | 
| 1938 |  |  |  |  |  |  | 0.0000000000036379795473786511902372363L, | 
| 1939 |  |  |  |  |  |  | 0.0000000000018189896503070659475848321L, | 
| 1940 |  |  |  |  |  |  | 0.0000000000009094947840263889282533118L, | 
| 1941 |  |  |  |  |  |  | 0.0000000000004547473783042154026799112L, | 
| 1942 |  |  |  |  |  |  | 0.0000000000002273736845824652515226821L, | 
| 1943 |  |  |  |  |  |  | 0.0000000000001136868407680227849349105L, | 
| 1944 |  |  |  |  |  |  | 0.0000000000000568434198762758560927718L, | 
| 1945 |  |  |  |  |  |  | 0.0000000000000284217097688930185545507L, | 
| 1946 |  |  |  |  |  |  | 0.0000000000000142108548280316067698343L, | 
| 1947 |  |  |  |  |  |  | 0.00000000000000710542739521085271287735L, | 
| 1948 |  |  |  |  |  |  | 0.00000000000000355271369133711367329847L, | 
| 1949 |  |  |  |  |  |  | 0.00000000000000177635684357912032747335L, | 
| 1950 |  |  |  |  |  |  | 0.000000000000000888178421093081590309609L, | 
| 1951 |  |  |  |  |  |  | 0.000000000000000444089210314381336419777L, | 
| 1952 |  |  |  |  |  |  | 0.000000000000000222044605079804198399932L, | 
| 1953 |  |  |  |  |  |  | 0.000000000000000111022302514106613372055L, | 
| 1954 |  |  |  |  |  |  | 0.0000000000000000555111512484548124372374L, | 
| 1955 |  |  |  |  |  |  | 0.0000000000000000277555756213612417258163L, | 
| 1956 |  |  |  |  |  |  | 0.0000000000000000138777878097252327628391L, | 
| 1957 |  |  |  |  |  |  | }; | 
| 1958 |  |  |  |  |  |  | #define NPRECALC_ZETA (sizeof(riemann_zeta_table)/sizeof(riemann_zeta_table[0])) | 
| 1959 |  |  |  |  |  |  |  | 
| 1960 |  |  |  |  |  |  | /* Riemann Zeta on the real line, with 1 subtracted. | 
| 1961 |  |  |  |  |  |  | * Compare to Math::Cephes zetac.  Also zeta with q=1 and subtracting 1. | 
| 1962 |  |  |  |  |  |  | * | 
| 1963 |  |  |  |  |  |  | * The Cephes zeta function uses a series (2k)!/B_2k which converges rapidly | 
| 1964 |  |  |  |  |  |  | * and has a very wide range of values.  We use it here for some values. | 
| 1965 |  |  |  |  |  |  | * | 
| 1966 |  |  |  |  |  |  | * Note: Calculations here are done on long doubles and we try to generate as | 
| 1967 |  |  |  |  |  |  | *       much accuracy as possible.  They will get returned to Perl as an NV, | 
| 1968 |  |  |  |  |  |  | *       which is typically a 64-bit double with 15 digits. | 
| 1969 |  |  |  |  |  |  | * | 
| 1970 |  |  |  |  |  |  | * For values 0.5 to 5, this code uses the rational Chebyshev approximation | 
| 1971 |  |  |  |  |  |  | * from Cody and Thacher.  This method is extraordinarily fast and very | 
| 1972 |  |  |  |  |  |  | * accurate over its range (slightly better than Cephes for most values).  If | 
| 1973 |  |  |  |  |  |  | * we had quad floats, we could use the 9-term polynomial. | 
| 1974 |  |  |  |  |  |  | */ | 
| 1975 | 2895 |  |  |  |  |  | long double ld_riemann_zeta(long double x) { | 
| 1976 |  |  |  |  |  |  | int i; | 
| 1977 |  |  |  |  |  |  |  | 
| 1978 | 2895 | 50 |  |  |  |  | if (x < 0)  croak("Invalid input to RiemannZeta:  x must be >= 0"); | 
| 1979 | 2895 | 50 |  |  |  |  | if (x == 1) return INFINITY; | 
| 1980 |  |  |  |  |  |  |  | 
| 1981 | 2895 | 100 |  |  |  |  | if (x == (unsigned int)x) { | 
| 1982 | 2891 |  |  |  |  |  | int k = x - 2; | 
| 1983 | 2891 | 50 |  |  |  |  | if ((k >= 0) && (k < (int)NPRECALC_ZETA)) | 
|  |  | 100 |  |  |  |  |  | 
| 1984 | 2 |  |  |  |  |  | return riemann_zeta_table[k]; | 
| 1985 |  |  |  |  |  |  | } | 
| 1986 |  |  |  |  |  |  |  | 
| 1987 |  |  |  |  |  |  | /* Cody / Thacher rational Chebyshev approximation for small values */ | 
| 1988 | 2893 | 50 |  |  |  |  | if (x >= 0.5 && x <= 5.0) { | 
|  |  | 100 |  |  |  |  |  | 
| 1989 |  |  |  |  |  |  | static const long double C8p[9] = { 1.287168121482446392809e10L, | 
| 1990 |  |  |  |  |  |  | 1.375396932037025111825e10L, | 
| 1991 |  |  |  |  |  |  | 5.106655918364406103683e09L, | 
| 1992 |  |  |  |  |  |  | 8.561471002433314862469e08L, | 
| 1993 |  |  |  |  |  |  | 7.483618124380232984824e07L, | 
| 1994 |  |  |  |  |  |  | 4.860106585461882511535e06L, | 
| 1995 |  |  |  |  |  |  | 2.739574990221406087728e05L, | 
| 1996 |  |  |  |  |  |  | 4.631710843183427123061e03L, | 
| 1997 |  |  |  |  |  |  | 5.787581004096660659109e01L }; | 
| 1998 |  |  |  |  |  |  | static const long double C8q[9] = { 2.574336242964846244667e10L, | 
| 1999 |  |  |  |  |  |  | 5.938165648679590160003e09L, | 
| 2000 |  |  |  |  |  |  | 9.006330373261233439089e08L, | 
| 2001 |  |  |  |  |  |  | 8.042536634283289888587e07L, | 
| 2002 |  |  |  |  |  |  | 5.609711759541920062814e06L, | 
| 2003 |  |  |  |  |  |  | 2.247431202899137523543e05L, | 
| 2004 |  |  |  |  |  |  | 7.574578909341537560115e03L, | 
| 2005 |  |  |  |  |  |  | -2.373835781373772623086e01L, | 
| 2006 |  |  |  |  |  |  | 1.000000000000000000000L    }; | 
| 2007 | 2 |  |  |  |  |  | long double sumn = C8p[0]+x*(C8p[1]+x*(C8p[2]+x*(C8p[3]+x*(C8p[4]+x*(C8p[5]+x*(C8p[6]+x*(C8p[7]+x*C8p[8]))))))); | 
| 2008 | 2 |  |  |  |  |  | long double sumd = C8q[0]+x*(C8q[1]+x*(C8q[2]+x*(C8q[3]+x*(C8q[4]+x*(C8q[5]+x*(C8q[6]+x*(C8q[7]+x*C8q[8]))))))); | 
| 2009 | 2 |  |  |  |  |  | long double sum = (sumn - (x-1)*sumd) / ((x-1)*sumd); | 
| 2010 | 2 |  |  |  |  |  | return sum; | 
| 2011 |  |  |  |  |  |  | } | 
| 2012 |  |  |  |  |  |  |  | 
| 2013 | 2891 | 50 |  |  |  |  | if (x > 17000.0) | 
| 2014 | 0 |  |  |  |  |  | return 0.0; | 
| 2015 |  |  |  |  |  |  |  | 
| 2016 |  |  |  |  |  |  | #if 0 | 
| 2017 |  |  |  |  |  |  | { | 
| 2018 |  |  |  |  |  |  | KAHAN_INIT(sum); | 
| 2019 |  |  |  |  |  |  | /* Simple defining series, works well. */ | 
| 2020 |  |  |  |  |  |  | for (i = 5; i <= 1000000; i++) { | 
| 2021 |  |  |  |  |  |  | long double term = powl(i, -x); | 
| 2022 |  |  |  |  |  |  | KAHAN_SUM(sum, term); | 
| 2023 |  |  |  |  |  |  | if (term < LDBL_EPSILON*sum) break; | 
| 2024 |  |  |  |  |  |  | } | 
| 2025 |  |  |  |  |  |  | KAHAN_SUM(sum, powl(4, -x) ); | 
| 2026 |  |  |  |  |  |  | KAHAN_SUM(sum, powl(3, -x) ); | 
| 2027 |  |  |  |  |  |  | KAHAN_SUM(sum, powl(2, -x) ); | 
| 2028 |  |  |  |  |  |  | return sum; | 
| 2029 |  |  |  |  |  |  | } | 
| 2030 |  |  |  |  |  |  | #endif | 
| 2031 |  |  |  |  |  |  |  | 
| 2032 |  |  |  |  |  |  | /* The 2n!/B_2k series used by the Cephes library. */ | 
| 2033 |  |  |  |  |  |  | { | 
| 2034 |  |  |  |  |  |  | /* gp/pari: | 
| 2035 |  |  |  |  |  |  | *   for(i=1,13,printf("%.38g\n",(2*i)!/bernreal(2*i))) | 
| 2036 |  |  |  |  |  |  | * MPU: | 
| 2037 |  |  |  |  |  |  | *   use bignum; | 
| 2038 |  |  |  |  |  |  | *   say +(factorial(2*$_)/bernreal(2*$_))->bround(38) for 1..13; | 
| 2039 |  |  |  |  |  |  | */ | 
| 2040 |  |  |  |  |  |  | static const long double A[] = { | 
| 2041 |  |  |  |  |  |  | 12.0L, | 
| 2042 |  |  |  |  |  |  | -720.0L, | 
| 2043 |  |  |  |  |  |  | 30240.0L, | 
| 2044 |  |  |  |  |  |  | -1209600.0L, | 
| 2045 |  |  |  |  |  |  | 47900160.0L, | 
| 2046 |  |  |  |  |  |  | -1892437580.3183791606367583212735166425L, | 
| 2047 |  |  |  |  |  |  | 74724249600.0L, | 
| 2048 |  |  |  |  |  |  | -2950130727918.1642244954382084600497650L, | 
| 2049 |  |  |  |  |  |  | 116467828143500.67248729113000661089201L, | 
| 2050 |  |  |  |  |  |  | -4597978722407472.6105457273596737891656L, | 
| 2051 |  |  |  |  |  |  | 181521054019435467.73425331153534235290L, | 
| 2052 |  |  |  |  |  |  | -7166165256175667011.3346447367083352775L, | 
| 2053 |  |  |  |  |  |  | 282908877253042996618.18640556532523927L, | 
| 2054 |  |  |  |  |  |  | }; | 
| 2055 |  |  |  |  |  |  | long double a, b, s, t; | 
| 2056 | 2891 |  |  |  |  |  | const long double w = 10.0; | 
| 2057 | 2891 |  |  |  |  |  | s = 0.0; | 
| 2058 | 2891 |  |  |  |  |  | b = 0.0; | 
| 2059 | 9246 | 100 |  |  |  |  | for (i = 2; i < 11; i++) { | 
| 2060 | 9244 |  |  |  |  |  | b = powl( i, -x ); | 
| 2061 | 9244 |  |  |  |  |  | s += b; | 
| 2062 | 9244 | 100 |  |  |  |  | if (fabsl(b) < fabsl(LDBL_EPSILON * s)) | 
| 2063 | 2889 |  |  |  |  |  | return s; | 
| 2064 |  |  |  |  |  |  | } | 
| 2065 | 2 |  |  |  |  |  | s = s + b*w/(x-1.0) - 0.5 * b; | 
| 2066 | 2 |  |  |  |  |  | a = 1.0; | 
| 2067 | 19 | 50 |  |  |  |  | for (i = 0; i < 13; i++) { | 
| 2068 | 19 |  |  |  |  |  | long double k = 2*i; | 
| 2069 | 19 |  |  |  |  |  | a *= x + k; | 
| 2070 | 19 |  |  |  |  |  | b /= w; | 
| 2071 | 19 |  |  |  |  |  | t = a*b/A[i]; | 
| 2072 | 19 |  |  |  |  |  | s = s + t; | 
| 2073 | 19 | 100 |  |  |  |  | if (fabsl(t) < fabsl(LDBL_EPSILON * s)) | 
| 2074 | 2 |  |  |  |  |  | break; | 
| 2075 | 17 |  |  |  |  |  | a *= x + k + 1.0; | 
| 2076 | 17 |  |  |  |  |  | b /= w; | 
| 2077 |  |  |  |  |  |  | } | 
| 2078 | 2 |  |  |  |  |  | return s; | 
| 2079 |  |  |  |  |  |  | } | 
| 2080 |  |  |  |  |  |  | } | 
| 2081 |  |  |  |  |  |  |  | 
| 2082 | 123 |  |  |  |  |  | long double RiemannR(long double x) { | 
| 2083 |  |  |  |  |  |  | long double part_term, term, flogx, ki, old_sum; | 
| 2084 |  |  |  |  |  |  | unsigned int k; | 
| 2085 | 123 |  |  |  |  |  | KAHAN_INIT(sum); | 
| 2086 |  |  |  |  |  |  |  | 
| 2087 | 123 | 50 |  |  |  |  | if (x <= 0) croak("Invalid input to ReimannR:  x must be > 0"); | 
| 2088 |  |  |  |  |  |  |  | 
| 2089 | 123 | 100 |  |  |  |  | if (x > 1e19) { | 
| 2090 | 2 |  |  |  |  |  | const signed char* amob = _moebius_range(0, 100); | 
| 2091 | 2 |  |  |  |  |  | KAHAN_SUM(sum, Li(x)); | 
| 2092 | 132 | 50 |  |  |  |  | for (k = 2; k <= 100; k++) { | 
| 2093 | 132 | 100 |  |  |  |  | if (amob[k] == 0) continue; | 
| 2094 | 82 |  |  |  |  |  | ki = 1.0L / (long double) k; | 
| 2095 | 82 |  |  |  |  |  | part_term = powl(x,ki); | 
| 2096 | 82 | 50 |  |  |  |  | if (part_term > LDBL_MAX) return INFINITY; | 
| 2097 | 82 |  |  |  |  |  | term = amob[k] * ki * Li(part_term); | 
| 2098 | 82 |  |  |  |  |  | old_sum = sum; | 
| 2099 | 82 |  |  |  |  |  | KAHAN_SUM(sum, term); | 
| 2100 | 82 | 100 |  |  |  |  | if (fabsl(sum - old_sum) <= LDBL_EPSILON) break; | 
| 2101 |  |  |  |  |  |  | } | 
| 2102 | 2 |  |  |  |  |  | Safefree(amob); | 
| 2103 | 2 |  |  |  |  |  | return sum; | 
| 2104 |  |  |  |  |  |  | } | 
| 2105 |  |  |  |  |  |  |  | 
| 2106 | 121 |  |  |  |  |  | KAHAN_SUM(sum, 1.0); | 
| 2107 | 121 |  |  |  |  |  | flogx = logl(x); | 
| 2108 | 121 |  |  |  |  |  | part_term = 1; | 
| 2109 |  |  |  |  |  |  |  | 
| 2110 | 9329 | 50 |  |  |  |  | for (k = 1; k <= 10000; k++) { | 
| 2111 | 9329 | 100 |  |  |  |  | ki = (k-1 < NPRECALC_ZETA) ? riemann_zeta_table[k-1] : ld_riemann_zeta(k+1); | 
| 2112 | 9329 |  |  |  |  |  | part_term *= flogx / k; | 
| 2113 | 9329 |  |  |  |  |  | term = part_term / (k + k * ki); | 
| 2114 | 9329 |  |  |  |  |  | old_sum = sum; | 
| 2115 | 9329 |  |  |  |  |  | KAHAN_SUM(sum, term); | 
| 2116 |  |  |  |  |  |  | /* printf("R %5d after adding %.18Lg, sum = %.19Lg (%Lg)\n", k, term, sum, fabsl(sum-old_sum)); */ | 
| 2117 | 9329 | 100 |  |  |  |  | if (fabsl(sum - old_sum) <= LDBL_EPSILON) break; | 
| 2118 |  |  |  |  |  |  | } | 
| 2119 |  |  |  |  |  |  |  | 
| 2120 | 121 |  |  |  |  |  | return sum; | 
| 2121 |  |  |  |  |  |  | } | 
| 2122 |  |  |  |  |  |  |  | 
| 2123 | 8 |  |  |  |  |  | static long double _lambertw_approx(long double x) { | 
| 2124 |  |  |  |  |  |  | /* See Veberic 2009 for other approximations */ | 
| 2125 | 8 | 100 |  |  |  |  | if (x < -0.060) {  /* Pade(3,2) */ | 
| 2126 | 2 |  |  |  |  |  | long double ti = 5.4365636569180904707205749L * x + 2.0L; | 
| 2127 | 2 | 50 |  |  |  |  | long double t  = (ti <= 0.0L) ? 0.0L : sqrtl(ti); | 
| 2128 | 2 |  |  |  |  |  | long double t2 = t*t; | 
| 2129 | 2 |  |  |  |  |  | long double t3 = t*t2; | 
| 2130 | 2 |  |  |  |  |  | return (-1.0L + (1.0L/6.0L)*t + (257.0L/720.0L)*t2 + (13.0L/720.0L)*t3) / (1.0L + (5.0L/6.0L)*t + (103.0L/720.0L)*t2); | 
| 2131 | 6 | 100 |  |  |  |  | } else if (x < 1.363) {  /* Winitzki 2003 section 3.5 */ | 
| 2132 | 2 |  |  |  |  |  | long double l1 = logl(1.0L+x); | 
| 2133 | 2 |  |  |  |  |  | return l1 * (1.0L - logl(1.0L+l1) / (2.0L+l1)); | 
| 2134 | 4 | 50 |  |  |  |  | } else if (x < 3.7) {    /* Modification of Vargas 2013 */ | 
| 2135 | 0 |  |  |  |  |  | long double l1 = logl(x); | 
| 2136 | 0 |  |  |  |  |  | long double l2 = logl(l1); | 
| 2137 | 0 |  |  |  |  |  | return l1 - l2 - logl(1.0L - l2/l1)/2.0L; | 
| 2138 |  |  |  |  |  |  | } else {                 /* Corless et al. 1993, page 22 */ | 
| 2139 | 4 |  |  |  |  |  | long double l1 = logl(x); | 
| 2140 | 4 |  |  |  |  |  | long double l2 = logl(l1); | 
| 2141 | 4 |  |  |  |  |  | long double d1 = 2.0L*l1*l1; | 
| 2142 | 4 |  |  |  |  |  | long double d2 = 3.0L*l1*d1; | 
| 2143 | 4 |  |  |  |  |  | long double d3 = 2.0L*l1*d2; | 
| 2144 | 4 |  |  |  |  |  | long double d4 = 5.0L*l1*d3; | 
| 2145 | 4 |  |  |  |  |  | long double w = l1 - l2 + l2/l1 + l2*(l2-2.0L)/d1; | 
| 2146 | 4 |  |  |  |  |  | w += l2*(6.0L+l2*(-9.0L+2.0L*l2))/d2; | 
| 2147 | 4 |  |  |  |  |  | w += l2*(-12.0L+l2*(36.0L+l2*(-22.0L+3.0L*l2)))/d3; | 
| 2148 | 4 |  |  |  |  |  | w += l2*(60.0L+l2*(-300.0L+l2*(350.0L+l2*(-125.0L+12.0L*l2))))/d4; | 
| 2149 | 4 |  |  |  |  |  | return w; | 
| 2150 |  |  |  |  |  |  | } | 
| 2151 |  |  |  |  |  |  | } | 
| 2152 |  |  |  |  |  |  |  | 
| 2153 | 9 |  |  |  |  |  | long double lambertw(long double x) { | 
| 2154 |  |  |  |  |  |  | long double w; | 
| 2155 |  |  |  |  |  |  | int i; | 
| 2156 |  |  |  |  |  |  |  | 
| 2157 | 9 | 50 |  |  |  |  | if (x < -0.36787944117145L) | 
| 2158 | 0 |  |  |  |  |  | croak("Invalid input to LambertW:  x must be >= -1/e"); | 
| 2159 | 9 | 100 |  |  |  |  | if (x == 0.0L) return 0.0L; | 
| 2160 |  |  |  |  |  |  |  | 
| 2161 |  |  |  |  |  |  | /* Estimate initial value */ | 
| 2162 | 8 |  |  |  |  |  | w = _lambertw_approx(x); | 
| 2163 |  |  |  |  |  |  | /* If input is too small, return .99999.... */ | 
| 2164 | 8 | 50 |  |  |  |  | if (w <= -1.0L) return -1.0L + 8*LDBL_EPSILON; | 
| 2165 |  |  |  |  |  |  | /* For very small inputs, don't iterate, return approx directly. */ | 
| 2166 | 8 | 100 |  |  |  |  | if (x < -0.36783) return w; | 
| 2167 |  |  |  |  |  |  |  | 
| 2168 |  |  |  |  |  |  | #if 0  /* Halley */ | 
| 2169 |  |  |  |  |  |  | lastw = w; | 
| 2170 |  |  |  |  |  |  | for (i = 0; i < 100; i++) { | 
| 2171 |  |  |  |  |  |  | long double ew = expl(w); | 
| 2172 |  |  |  |  |  |  | long double wew = w * ew; | 
| 2173 |  |  |  |  |  |  | long double wewx = wew - x; | 
| 2174 |  |  |  |  |  |  | long double w1 = w + 1; | 
| 2175 |  |  |  |  |  |  | w = w - wewx / (ew * w1 - (w+2) * wewx/(2*w1)); | 
| 2176 |  |  |  |  |  |  | if (w != 0.0L && fabsl((w-lastw)/w) <= 8*LDBL_EPSILON) break; | 
| 2177 |  |  |  |  |  |  | lastw = w; | 
| 2178 |  |  |  |  |  |  | } | 
| 2179 |  |  |  |  |  |  | #else  /* Fritsch, see Veberic 2009.  1-2 iterations are enough. */ | 
| 2180 | 30 | 100 |  |  |  |  | for (i = 0; i < 6 && w != 0.0L; i++) { | 
|  |  | 50 |  |  |  |  |  | 
| 2181 | 28 |  |  |  |  |  | long double w1 = 1 + w; | 
| 2182 | 28 |  |  |  |  |  | long double zn = logl(x/w) - w; | 
| 2183 | 28 |  |  |  |  |  | long double qn = 2 * w1 * (w1+(2.0L/3.0L)*zn); | 
| 2184 | 28 |  |  |  |  |  | long double en = (zn/w1) * (qn-zn)/(qn-2.0L*zn); | 
| 2185 |  |  |  |  |  |  | /* w *= 1.0L + en;  if (fabsl(en) <= 16*LDBL_EPSILON) break; */ | 
| 2186 | 28 |  |  |  |  |  | long double wen = w * en; | 
| 2187 | 28 |  |  |  |  |  | w += wen; | 
| 2188 | 28 | 100 |  |  |  |  | if (fabsl(wen) <= 64*LDBL_EPSILON) break; | 
| 2189 |  |  |  |  |  |  | } | 
| 2190 |  |  |  |  |  |  | #endif | 
| 2191 |  |  |  |  |  |  |  | 
| 2192 | 7 |  |  |  |  |  | return w; | 
| 2193 |  |  |  |  |  |  | } | 
| 2194 |  |  |  |  |  |  |  | 
| 2195 |  |  |  |  |  |  | #if HAVE_STD_U64 | 
| 2196 |  |  |  |  |  |  | #define U64T uint64_t | 
| 2197 |  |  |  |  |  |  | #else | 
| 2198 |  |  |  |  |  |  | #define U64T UV | 
| 2199 |  |  |  |  |  |  | #endif | 
| 2200 |  |  |  |  |  |  |  | 
| 2201 |  |  |  |  |  |  | /* Spigot from Arndt, Haenel, Winter, and Flammenkamp. */ | 
| 2202 |  |  |  |  |  |  | /* Modified for larger digits and rounding by Dana Jacobsen */ | 
| 2203 | 987 |  |  |  |  |  | char* pidigits(int digits) | 
| 2204 |  |  |  |  |  |  | { | 
| 2205 |  |  |  |  |  |  | char* out; | 
| 2206 |  |  |  |  |  |  | uint32_t *a, b, c, d, e, g, i, d4, d3, d2, d1; | 
| 2207 | 987 |  |  |  |  |  | uint32_t const f = 10000; | 
| 2208 |  |  |  |  |  |  | U64T d64;  /* 64-bit intermediate for 2*2*10000*b > 2^32 (~30k digits) */ | 
| 2209 |  |  |  |  |  |  |  | 
| 2210 | 987 | 50 |  |  |  |  | if (digits <= 0) return 0; | 
| 2211 | 987 | 100 |  |  |  |  | if (digits <= DBL_DIG && digits <= 18) { | 
|  |  | 50 |  |  |  |  |  | 
| 2212 | 15 |  |  |  |  |  | Newz(0, out, 19, char); | 
| 2213 | 15 |  |  |  |  |  | (void)sprintf(out, "%.*lf", (digits-1), 3.141592653589793238); | 
| 2214 | 15 |  |  |  |  |  | return out; | 
| 2215 |  |  |  |  |  |  | } | 
| 2216 | 972 |  |  |  |  |  | digits++;   /* For rounding */ | 
| 2217 | 972 |  |  |  |  |  | c = 14*(digits/4 + 2); | 
| 2218 | 972 |  |  |  |  |  | New(0, out, digits+5+1, char); | 
| 2219 | 972 |  |  |  |  |  | *out++ = '3';  /* We'll turn "31415..." into "3.1415..." */ | 
| 2220 | 972 | 50 |  |  |  |  | New(0, a, c, uint32_t); | 
| 2221 | 1776998 | 100 |  |  |  |  | for (b = 0; b < c; b++)  a[b] = 2000; | 
| 2222 |  |  |  |  |  |  |  | 
| 2223 | 972 |  |  |  |  |  | d = i = 0; | 
| 2224 | 126616 | 100 |  |  |  |  | while ((b = c -= 14) > 0 && i < (uint32_t)digits) { | 
|  |  | 100 |  |  |  |  |  | 
| 2225 | 125644 |  |  |  |  |  | d = e = d % f; | 
| 2226 | 125644 | 50 |  |  |  |  | if (b > 107000) {  /* Use 64-bit intermediate while necessary. */ | 
| 2227 | 0 | 0 |  |  |  |  | for (d64 = d; --b > 107000; ) { | 
| 2228 | 0 |  |  |  |  |  | g = (b << 1) - 1; | 
| 2229 | 0 |  |  |  |  |  | d64 = d64 * b  +  f * (U64T)a[b]; | 
| 2230 | 0 |  |  |  |  |  | a[b] = d64 % g; | 
| 2231 | 0 |  |  |  |  |  | d64 /= g; | 
| 2232 |  |  |  |  |  |  | } | 
| 2233 | 0 |  |  |  |  |  | d = d64; | 
| 2234 | 0 |  |  |  |  |  | b++; | 
| 2235 |  |  |  |  |  |  | } | 
| 2236 | 148467144 | 100 |  |  |  |  | while (--b > 0) { | 
| 2237 | 148341500 |  |  |  |  |  | g = (b << 1) - 1; | 
| 2238 | 148341500 |  |  |  |  |  | d = d * b  +  f * a[b]; | 
| 2239 | 148341500 |  |  |  |  |  | a[b] = d % g; | 
| 2240 | 148341500 |  |  |  |  |  | d /= g; | 
| 2241 |  |  |  |  |  |  | } | 
| 2242 |  |  |  |  |  |  | /* sprintf(out+i, "%04d", e+d/f);   i += 4; */ | 
| 2243 | 125644 |  |  |  |  |  | d4 = e + d/f; | 
| 2244 | 125644 | 50 |  |  |  |  | if (d4 > 9999) { | 
| 2245 | 0 |  |  |  |  |  | d4 -= 10000; | 
| 2246 | 0 |  |  |  |  |  | out[i-1]++; | 
| 2247 | 0 | 0 |  |  |  |  | for (b=i-1; out[b] == '0'+1; b--) { out[b]='0'; out[b-1]++; } | 
| 2248 |  |  |  |  |  |  | } | 
| 2249 | 125644 |  |  |  |  |  | d3 = d4/10;  d2 = d3/10;  d1 = d2/10; | 
| 2250 | 125644 |  |  |  |  |  | out[i++] = '0' + d1; | 
| 2251 | 125644 |  |  |  |  |  | out[i++] = '0' + d2-d1*10; | 
| 2252 | 125644 |  |  |  |  |  | out[i++] = '0' + d3-d2*10; | 
| 2253 | 125644 |  |  |  |  |  | out[i++] = '0' + d4-d3*10; | 
| 2254 |  |  |  |  |  |  | } | 
| 2255 | 972 |  |  |  |  |  | Safefree(a); | 
| 2256 | 972 | 100 |  |  |  |  | if (out[digits-1] >= '5') out[digits-2]++;  /* Round */ | 
| 2257 | 1042 | 100 |  |  |  |  | for (i = digits-2; out[i] == '9'+1; i--)    /* Keep rounding */ | 
| 2258 | 70 |  |  |  |  |  | { out[i] = '0';  out[i-1]++; } | 
| 2259 | 972 |  |  |  |  |  | digits--;  /* Undo the extra digit we used for rounding */ | 
| 2260 | 972 |  |  |  |  |  | out[digits] = '\0'; | 
| 2261 | 972 |  |  |  |  |  | *out-- = '.'; | 
| 2262 | 972 |  |  |  |  |  | return out; | 
| 2263 |  |  |  |  |  |  | } | 
| 2264 |  |  |  |  |  |  |  | 
| 2265 |  |  |  |  |  |  | /* 1. Perform signed integer validation on b/blen. | 
| 2266 |  |  |  |  |  |  | * 2. Compare to a/alen using min or max based on first arg. | 
| 2267 |  |  |  |  |  |  | * 3. Return 0 to select a, 1 to select b. | 
| 2268 |  |  |  |  |  |  | */ | 
| 2269 | 4 |  |  |  |  |  | int strnum_minmax(int min, char* a, STRLEN alen, char* b, STRLEN blen) | 
| 2270 |  |  |  |  |  |  | { | 
| 2271 |  |  |  |  |  |  | int aneg, bneg; | 
| 2272 |  |  |  |  |  |  | STRLEN i; | 
| 2273 |  |  |  |  |  |  | /* a is checked, process b */ | 
| 2274 | 4 | 50 |  |  |  |  | if (b == 0 || blen == 0) croak("Parameter must be a positive integer"); | 
|  |  | 50 |  |  |  |  |  | 
| 2275 | 4 |  |  |  |  |  | bneg = (b[0] == '-'); | 
| 2276 | 4 | 50 |  |  |  |  | if (b[0] == '-' || b[0] == '+') { b++; blen--; } | 
|  |  | 50 |  |  |  |  |  | 
| 2277 | 4 | 50 |  |  |  |  | while (blen > 0 && *b == '0') { b++; blen--; } | 
|  |  | 50 |  |  |  |  |  | 
| 2278 | 84 | 100 |  |  |  |  | for (i = 0; i < blen; i++) | 
| 2279 | 80 | 50 |  |  |  |  | if (!isDIGIT(b[i])) | 
| 2280 | 0 |  |  |  |  |  | break; | 
| 2281 | 4 | 50 |  |  |  |  | if (blen == 0 || i < blen) | 
|  |  | 50 |  |  |  |  |  | 
| 2282 | 0 |  |  |  |  |  | croak("Parameter must be a positive integer"); | 
| 2283 |  |  |  |  |  |  |  | 
| 2284 | 4 | 100 |  |  |  |  | if (a == 0) return 1; | 
| 2285 |  |  |  |  |  |  |  | 
| 2286 | 2 |  |  |  |  |  | aneg = (a[0] == '-'); | 
| 2287 | 2 | 50 |  |  |  |  | if (a[0] == '-' || a[0] == '+') { a++; alen--; } | 
|  |  | 50 |  |  |  |  |  | 
| 2288 | 2 | 50 |  |  |  |  | while (alen > 0 && *a == '0') { a++; alen--; } | 
|  |  | 50 |  |  |  |  |  | 
| 2289 |  |  |  |  |  |  |  | 
| 2290 | 2 | 50 |  |  |  |  | if (aneg != bneg)  return  min  ?  (bneg == 1)  :  (aneg == 1); | 
|  |  | 0 |  |  |  |  |  | 
| 2291 | 2 | 50 |  |  |  |  | if (aneg == 1)  min = !min; | 
| 2292 | 2 | 50 |  |  |  |  | if (alen != blen)  return  min  ?  (alen > blen) :  (blen > alen); | 
|  |  | 0 |  |  |  |  |  | 
| 2293 |  |  |  |  |  |  |  | 
| 2294 | 2 | 50 |  |  |  |  | for (i = 0; i < blen; i++) | 
| 2295 | 2 | 50 |  |  |  |  | if (a[i] != b[i]) | 
| 2296 | 2 | 100 |  |  |  |  | return  min  ?  (a[i] > b[i])  :  (b[i] > a[i]); | 
| 2297 | 0 |  |  |  |  |  | return 0; /* equal */ | 
| 2298 |  |  |  |  |  |  | } | 
| 2299 |  |  |  |  |  |  |  | 
| 2300 | 6 |  |  |  |  |  | int from_digit_string(UV* rn, const char* s, int base) | 
| 2301 |  |  |  |  |  |  | { | 
| 2302 | 6 |  |  |  |  |  | UV max, n = 0; | 
| 2303 |  |  |  |  |  |  | int i, len; | 
| 2304 |  |  |  |  |  |  |  | 
| 2305 |  |  |  |  |  |  | /* Skip leading -/+ and zeros */ | 
| 2306 | 6 | 50 |  |  |  |  | if (s[0] == '-' || s[0] == '+') s++; | 
|  |  | 50 |  |  |  |  |  | 
| 2307 | 6 | 50 |  |  |  |  | while (s[0] == '0') s++; | 
| 2308 |  |  |  |  |  |  |  | 
| 2309 | 6 |  |  |  |  |  | len = strlen(s); | 
| 2310 | 6 |  |  |  |  |  | max = (UV_MAX-base+1)/base; | 
| 2311 |  |  |  |  |  |  |  | 
| 2312 | 39 | 100 |  |  |  |  | for (i = 0, len = strlen(s); i < len; i++) { | 
| 2313 | 34 |  |  |  |  |  | const char c = s[i]; | 
| 2314 | 34 | 50 |  |  |  |  | int d = !isalnum(c) ? 255 : (c <= '9') ? c-'0' : (c <= 'Z') ? c-'A'+10 : c-'a'+10; | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 50 |  |  |  |  |  | 
| 2315 | 34 | 50 |  |  |  |  | if (d >= base) croak("Invalid digit for base %d", base); | 
| 2316 | 34 | 100 |  |  |  |  | if (n > max) return 0;   /* Overflow */ | 
| 2317 | 33 |  |  |  |  |  | n = n * base + d; | 
| 2318 |  |  |  |  |  |  | } | 
| 2319 | 5 |  |  |  |  |  | *rn = n; | 
| 2320 | 5 |  |  |  |  |  | return 1; | 
| 2321 |  |  |  |  |  |  | } | 
| 2322 |  |  |  |  |  |  |  | 
| 2323 | 13 |  |  |  |  |  | int from_digit_to_UV(UV* rn, UV* r, int len, int base) | 
| 2324 |  |  |  |  |  |  | { | 
| 2325 | 13 |  |  |  |  |  | UV d, n = 0; | 
| 2326 |  |  |  |  |  |  | int i; | 
| 2327 | 13 | 50 |  |  |  |  | if (len < 0 || len > BITS_PER_WORD) | 
|  |  | 50 |  |  |  |  |  | 
| 2328 | 0 |  |  |  |  |  | return 0; | 
| 2329 | 76 | 100 |  |  |  |  | for (i = 0; i < len; i++) { | 
| 2330 | 63 |  |  |  |  |  | d = r[i]; | 
| 2331 | 63 | 50 |  |  |  |  | if (n > (UV_MAX-d)/base) break;  /* overflow */ | 
| 2332 | 63 |  |  |  |  |  | n = n * base + d; | 
| 2333 |  |  |  |  |  |  | } | 
| 2334 | 13 |  |  |  |  |  | *rn = n; | 
| 2335 | 13 |  |  |  |  |  | return (i >= len); | 
| 2336 |  |  |  |  |  |  | } | 
| 2337 |  |  |  |  |  |  |  | 
| 2338 |  |  |  |  |  |  |  | 
| 2339 | 0 |  |  |  |  |  | int from_digit_to_str(char** rstr, UV* r, int len, int base) | 
| 2340 |  |  |  |  |  |  | { | 
| 2341 |  |  |  |  |  |  | char *so, *s; | 
| 2342 |  |  |  |  |  |  | int i; | 
| 2343 |  |  |  |  |  |  |  | 
| 2344 | 0 | 0 |  |  |  |  | if (len < 0 || !(base == 2 || base == 10 || base == 16)) return 0; | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
| 2345 |  |  |  |  |  |  |  | 
| 2346 | 0 | 0 |  |  |  |  | if (r[0] >= (UV) base) return 0;  /* TODO: We don't apply extended carry */ | 
| 2347 |  |  |  |  |  |  |  | 
| 2348 | 0 |  |  |  |  |  | New(0, so, len + 3, char); | 
| 2349 | 0 |  |  |  |  |  | s = so; | 
| 2350 | 0 | 0 |  |  |  |  | if (base == 2 || base == 16) { | 
|  |  | 0 |  |  |  |  |  | 
| 2351 | 0 |  |  |  |  |  | *s++ = '0'; | 
| 2352 | 0 | 0 |  |  |  |  | *s++ = (base == 2) ? 'b' : 'x'; | 
| 2353 |  |  |  |  |  |  | } | 
| 2354 | 0 | 0 |  |  |  |  | for (i = 0; i < len; i++) { | 
| 2355 | 0 |  |  |  |  |  | UV d = r[i]; | 
| 2356 | 0 | 0 |  |  |  |  | s[i] = (d < 10) ? '0'+d : 'a'+d-10; | 
| 2357 |  |  |  |  |  |  | } | 
| 2358 | 0 |  |  |  |  |  | s[len] = '\0'; | 
| 2359 | 0 |  |  |  |  |  | *rstr = so; | 
| 2360 | 0 |  |  |  |  |  | return 1; | 
| 2361 |  |  |  |  |  |  | } | 
| 2362 |  |  |  |  |  |  |  | 
| 2363 | 17 |  |  |  |  |  | int to_digit_array(int* bits, UV n, int base, int length) | 
| 2364 |  |  |  |  |  |  | { | 
| 2365 |  |  |  |  |  |  | int d; | 
| 2366 |  |  |  |  |  |  |  | 
| 2367 | 17 | 50 |  |  |  |  | if (base < 2 || length > 128) return -1; | 
|  |  | 50 |  |  |  |  |  | 
| 2368 |  |  |  |  |  |  |  | 
| 2369 | 17 | 100 |  |  |  |  | if (base == 2) { | 
| 2370 | 87 | 100 |  |  |  |  | for (d = 0; n; n >>= 1) | 
| 2371 | 77 |  |  |  |  |  | bits[d++] = n & 1; | 
| 2372 |  |  |  |  |  |  | } else { | 
| 2373 | 31 | 100 |  |  |  |  | for (d = 0; n; n /= base) | 
| 2374 | 24 |  |  |  |  |  | bits[d++] = n % base; | 
| 2375 |  |  |  |  |  |  | } | 
| 2376 | 17 | 100 |  |  |  |  | if (length < 0) length = d; | 
| 2377 | 43 | 100 |  |  |  |  | while (d < length) | 
| 2378 | 26 |  |  |  |  |  | bits[d++] = 0; | 
| 2379 | 17 |  |  |  |  |  | return length; | 
| 2380 |  |  |  |  |  |  | } | 
| 2381 |  |  |  |  |  |  |  | 
| 2382 | 3 |  |  |  |  |  | int to_digit_string(char* s, UV n, int base, int length) | 
| 2383 |  |  |  |  |  |  | { | 
| 2384 |  |  |  |  |  |  | int digits[128]; | 
| 2385 | 3 |  |  |  |  |  | int i, len = to_digit_array(digits, n, base, length); | 
| 2386 |  |  |  |  |  |  |  | 
| 2387 | 3 | 50 |  |  |  |  | if (len < 0) return -1; | 
| 2388 | 3 | 50 |  |  |  |  | if (base > 36) croak("invalid base for string: %d", base); | 
| 2389 |  |  |  |  |  |  |  | 
| 2390 | 21 | 100 |  |  |  |  | for (i = 0; i < len; i++) { | 
| 2391 | 18 |  |  |  |  |  | int dig = digits[len-i-1]; | 
| 2392 | 18 | 50 |  |  |  |  | s[i] = (dig < 10) ? '0'+dig : 'a'+dig-10; | 
| 2393 |  |  |  |  |  |  | } | 
| 2394 | 3 |  |  |  |  |  | s[len] = '\0'; | 
| 2395 | 3 |  |  |  |  |  | return len; | 
| 2396 |  |  |  |  |  |  | } | 
| 2397 |  |  |  |  |  |  |  | 
| 2398 | 1 |  |  |  |  |  | int to_string_128(char str[40], IV hi, UV lo) | 
| 2399 |  |  |  |  |  |  | { | 
| 2400 | 1 |  |  |  |  |  | int i, slen = 0, isneg = 0; | 
| 2401 |  |  |  |  |  |  |  | 
| 2402 | 1 | 50 |  |  |  |  | if (hi < 0) { | 
| 2403 | 0 |  |  |  |  |  | isneg = 1; | 
| 2404 | 0 |  |  |  |  |  | hi = -(hi+1); | 
| 2405 | 0 |  |  |  |  |  | lo = UV_MAX - lo + 1; | 
| 2406 |  |  |  |  |  |  | } | 
| 2407 |  |  |  |  |  |  | #if BITS_PER_WORD == 64 && HAVE_UINT128 | 
| 2408 |  |  |  |  |  |  | { | 
| 2409 | 1 |  |  |  |  |  | uint128_t dd, sum = (((uint128_t) hi) << 64) + lo; | 
| 2410 |  |  |  |  |  |  | do { | 
| 2411 | 20 |  |  |  |  |  | dd = sum / 10; | 
| 2412 | 20 |  |  |  |  |  | str[slen++] = '0' + (sum - dd*10); | 
| 2413 | 20 |  |  |  |  |  | sum = dd; | 
| 2414 | 20 | 100 |  |  |  |  | } while (sum); | 
| 2415 |  |  |  |  |  |  | } | 
| 2416 |  |  |  |  |  |  | #else | 
| 2417 |  |  |  |  |  |  | { | 
| 2418 |  |  |  |  |  |  | UV d, r; | 
| 2419 |  |  |  |  |  |  | uint32_t a[4]; | 
| 2420 |  |  |  |  |  |  | a[0] = hi >> (BITS_PER_WORD/2); | 
| 2421 |  |  |  |  |  |  | a[1] = hi & (UV_MAX >> (BITS_PER_WORD/2)); | 
| 2422 |  |  |  |  |  |  | a[2] = lo >> (BITS_PER_WORD/2); | 
| 2423 |  |  |  |  |  |  | a[3] = lo & (UV_MAX >> (BITS_PER_WORD/2)); | 
| 2424 |  |  |  |  |  |  | do { | 
| 2425 |  |  |  |  |  |  | r = a[0]; | 
| 2426 |  |  |  |  |  |  | d = r/10;  r = ((r-d*10) << (BITS_PER_WORD/2)) + a[1];  a[0] = d; | 
| 2427 |  |  |  |  |  |  | d = r/10;  r = ((r-d*10) << (BITS_PER_WORD/2)) + a[2];  a[1] = d; | 
| 2428 |  |  |  |  |  |  | d = r/10;  r = ((r-d*10) << (BITS_PER_WORD/2)) + a[3];  a[2] = d; | 
| 2429 |  |  |  |  |  |  | d = r/10;  r = r-d*10;  a[3] = d; | 
| 2430 |  |  |  |  |  |  | str[slen++] = '0'+(r%10); | 
| 2431 |  |  |  |  |  |  | } while (a[0] || a[1] || a[2] || a[3]); | 
| 2432 |  |  |  |  |  |  | } | 
| 2433 |  |  |  |  |  |  | #endif | 
| 2434 |  |  |  |  |  |  | /* Reverse the order */ | 
| 2435 | 11 | 100 |  |  |  |  | for (i=0; i < slen/2; i++) { | 
| 2436 | 10 |  |  |  |  |  | char t=str[i]; | 
| 2437 | 10 |  |  |  |  |  | str[i]=str[slen-i-1]; | 
| 2438 | 10 |  |  |  |  |  | str[slen-i-1] = t; | 
| 2439 |  |  |  |  |  |  | } | 
| 2440 |  |  |  |  |  |  | /* Prepend a negative sign if needed */ | 
| 2441 | 1 | 50 |  |  |  |  | if (isneg) { | 
| 2442 | 0 | 0 |  |  |  |  | for (i = slen; i > 0; i--) | 
| 2443 | 0 |  |  |  |  |  | str[i] = str[i-1]; | 
| 2444 | 0 |  |  |  |  |  | str[0] = '-'; | 
| 2445 | 0 |  |  |  |  |  | slen++; | 
| 2446 |  |  |  |  |  |  | } | 
| 2447 |  |  |  |  |  |  | /* Add terminator */ | 
| 2448 | 1 |  |  |  |  |  | str[slen] = '\0'; | 
| 2449 | 1 |  |  |  |  |  | return slen; | 
| 2450 |  |  |  |  |  |  | } | 
| 2451 |  |  |  |  |  |  |  | 
| 2452 |  |  |  |  |  |  | /* Oddball primality test. | 
| 2453 |  |  |  |  |  |  | * In this file rather than primality.c because it uses factoring (!). | 
| 2454 |  |  |  |  |  |  | * Algorithm from Charles R Greathouse IV, 2015 */ | 
| 2455 | 472642 |  |  |  |  |  | static INLINE uint32_t _catalan_v32(uint32_t n, uint32_t p) { | 
| 2456 | 472642 |  |  |  |  |  | uint32_t s = 0; | 
| 2457 | 946183 | 100 |  |  |  |  | while (n /= p)  s += n % 2; | 
| 2458 | 472642 |  |  |  |  |  | return s; | 
| 2459 |  |  |  |  |  |  | } | 
| 2460 | 0 |  |  |  |  |  | static INLINE uint32_t _catalan_v(UV n, UV p) { | 
| 2461 | 0 |  |  |  |  |  | uint32_t s = 0; | 
| 2462 | 0 | 0 |  |  |  |  | while (n /= p)  s += n % 2; | 
| 2463 | 0 |  |  |  |  |  | return s; | 
| 2464 |  |  |  |  |  |  | } | 
| 2465 | 901451 |  |  |  |  |  | static UV _catalan_mult(UV m, UV p, UV n, UV a) { | 
| 2466 | 901451 | 100 |  |  |  |  | if (p > a) { | 
| 2467 | 428809 |  |  |  |  |  | m = mulmod(m, p, n); | 
| 2468 |  |  |  |  |  |  | } else { | 
| 2469 | 472642 | 50 |  |  |  |  | UV pow = (n <= 4294967295UL) ? _catalan_v32(a<<1,p) : _catalan_v(a<<1,p); | 
| 2470 | 472642 |  |  |  |  |  | m = (pow == 0) ? m | 
| 2471 | 658853 | 100 |  |  |  |  | : (pow == 1) ? mulmod(m,p,n) | 
| 2472 | 186211 | 100 |  |  |  |  | : mulmod(m,powmod(p,pow,n),n); | 
| 2473 |  |  |  |  |  |  | } | 
| 2474 | 901451 |  |  |  |  |  | return m; | 
| 2475 |  |  |  |  |  |  | } | 
| 2476 | 5 |  |  |  |  |  | static int _catalan_vtest(UV n, UV p) { | 
| 2477 | 18 | 100 |  |  |  |  | while (n /= p) | 
| 2478 | 13 | 50 |  |  |  |  | if (n % 2) | 
| 2479 | 0 |  |  |  |  |  | return 1; | 
| 2480 | 5 |  |  |  |  |  | return 0; | 
| 2481 |  |  |  |  |  |  | } | 
| 2482 | 3 |  |  |  |  |  | int is_catalan_pseudoprime(UV n) { | 
| 2483 |  |  |  |  |  |  | UV m, a; | 
| 2484 |  |  |  |  |  |  | int i; | 
| 2485 |  |  |  |  |  |  |  | 
| 2486 | 3 | 50 |  |  |  |  | if (n < 2 || ((n % 2) == 0 && n != 2)) return 0; | 
|  |  | 50 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
| 2487 | 3 | 50 |  |  |  |  | if (is_prob_prime(n)) return 1; | 
| 2488 |  |  |  |  |  |  |  | 
| 2489 | 3 |  |  |  |  |  | m = 1; | 
| 2490 | 3 |  |  |  |  |  | a = n >> 1; | 
| 2491 |  |  |  |  |  |  | { | 
| 2492 |  |  |  |  |  |  | UV factors[MPU_MAX_FACTORS+1]; | 
| 2493 | 3 |  |  |  |  |  | int nfactors = factor_exp(n, factors, 0); | 
| 2494 |  |  |  |  |  |  | /* Aebi and Cairns 2008, page 9 */ | 
| 2495 |  |  |  |  |  |  | #if BITS_PER_WORD == 32 | 
| 2496 |  |  |  |  |  |  | if (nfactors == 2) | 
| 2497 |  |  |  |  |  |  | #else | 
| 2498 | 3 | 50 |  |  |  |  | if (nfactors == 2 && (n < UVCONST(10000000000))) | 
|  |  | 0 |  |  |  |  |  | 
| 2499 |  |  |  |  |  |  | #endif | 
| 2500 | 0 |  |  |  |  |  | return 0; | 
| 2501 | 8 | 100 |  |  |  |  | for (i = 0; i < nfactors; i++) { | 
| 2502 | 5 | 50 |  |  |  |  | if (_catalan_vtest(a << 1, factors[i])) | 
| 2503 | 0 |  |  |  |  |  | return 0; | 
| 2504 |  |  |  |  |  |  | } | 
| 2505 |  |  |  |  |  |  | } | 
| 2506 |  |  |  |  |  |  | { | 
| 2507 |  |  |  |  |  |  | UV seg_base, seg_low, seg_high; | 
| 2508 |  |  |  |  |  |  | unsigned char* segment; | 
| 2509 |  |  |  |  |  |  | void* ctx; | 
| 2510 | 3 |  |  |  |  |  | m = _catalan_mult(m, 2, n, a); | 
| 2511 | 3 |  |  |  |  |  | m = _catalan_mult(m, 3, n, a); | 
| 2512 | 3 |  |  |  |  |  | m = _catalan_mult(m, 5, n, a); | 
| 2513 | 3 |  |  |  |  |  | ctx = start_segment_primes(7, n, &segment); | 
| 2514 | 19 | 100 |  |  |  |  | while (next_segment_primes(ctx, &seg_base, &seg_low, &seg_high)) { | 
| 2515 | 957825 | 50 |  |  |  |  | START_DO_FOR_EACH_SIEVE_PRIME( segment, seg_base, seg_low, seg_high ) { | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 50 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
| 2516 | 901442 |  |  |  |  |  | m = _catalan_mult(m, p, n, a); | 
| 2517 | 56367 |  |  |  |  |  | } END_DO_FOR_EACH_SIEVE_PRIME | 
| 2518 |  |  |  |  |  |  | } | 
| 2519 | 3 |  |  |  |  |  | end_segment_primes(ctx); | 
| 2520 |  |  |  |  |  |  | } | 
| 2521 | 3 | 100 |  |  |  |  | return (a & 1) ? (m==(n-1)) : (m==1); | 
| 2522 |  |  |  |  |  |  | } | 
| 2523 |  |  |  |  |  |  |  | 
| 2524 |  |  |  |  |  |  | /* If we have fast CTZ, use this GCD.  See Brent Alg V and FLINT Abhinav Baid */ | 
| 2525 | 35868 |  |  |  |  |  | UV gcdz(UV x, UV y) { | 
| 2526 |  |  |  |  |  |  | UV f, x2, y2; | 
| 2527 |  |  |  |  |  |  |  | 
| 2528 | 35868 | 100 |  |  |  |  | if (x == 0) return y; | 
| 2529 |  |  |  |  |  |  |  | 
| 2530 | 35858 | 100 |  |  |  |  | if (y & 1) {  /* Optimize y odd */ | 
| 2531 | 20064 | 50 |  |  |  |  | x >>= ctz(x); | 
| 2532 | 402166 | 100 |  |  |  |  | while (x != y) { | 
| 2533 | 382102 | 100 |  |  |  |  | if (x < y) { y -= x; y >>= ctz(y); } | 
|  |  | 50 |  |  |  |  |  | 
| 2534 | 324633 | 50 |  |  |  |  | else       { x -= y; x >>= ctz(x); } | 
| 2535 |  |  |  |  |  |  | } | 
| 2536 | 20064 |  |  |  |  |  | return x; | 
| 2537 |  |  |  |  |  |  | } | 
| 2538 |  |  |  |  |  |  |  | 
| 2539 | 15794 | 100 |  |  |  |  | if (y == 0) return x; | 
| 2540 |  |  |  |  |  |  |  | 
| 2541 |  |  |  |  |  |  | /* Alternately:  f = ctz(x|y); x >>= ctz(x); y >>= ctz(y); */ | 
| 2542 | 15793 | 50 |  |  |  |  | x2 = ctz(x); | 
| 2543 | 15793 | 50 |  |  |  |  | y2 = ctz(y); | 
| 2544 | 15793 |  |  |  |  |  | f = (x2 <= y2) ? x2 : y2; | 
| 2545 | 15793 |  |  |  |  |  | x >>= x2; | 
| 2546 | 15793 |  |  |  |  |  | y >>= y2; | 
| 2547 |  |  |  |  |  |  |  | 
| 2548 | 203227 | 100 |  |  |  |  | while (x != y) { | 
| 2549 | 187434 | 100 |  |  |  |  | if (x < y) { | 
| 2550 | 14545 |  |  |  |  |  | y -= x; | 
| 2551 | 14545 | 50 |  |  |  |  | y >>= ctz(y); | 
| 2552 |  |  |  |  |  |  | } else { | 
| 2553 | 172889 |  |  |  |  |  | x -= y; | 
| 2554 | 172889 | 50 |  |  |  |  | x >>= ctz(x); | 
| 2555 |  |  |  |  |  |  | } | 
| 2556 |  |  |  |  |  |  | } | 
| 2557 | 15793 |  |  |  |  |  | return x << f; | 
| 2558 |  |  |  |  |  |  | } | 
| 2559 |  |  |  |  |  |  |  | 
| 2560 |  |  |  |  |  |  | /* The intermediate values are so large that we can only stay in 64-bit | 
| 2561 |  |  |  |  |  |  | * up to 53 or so using the divisor_sum calculations.  So just use a table. | 
| 2562 |  |  |  |  |  |  | * Save space by just storing the 32-bit values. */ | 
| 2563 |  |  |  |  |  |  | static const int32_t tau_table[] = { | 
| 2564 |  |  |  |  |  |  | 0,1,-24,252,-1472,4830,-6048,-16744,84480,-113643,-115920,534612,-370944,-577738,401856,1217160,987136,-6905934,2727432,10661420,-7109760,-4219488,-12830688,18643272,21288960,-25499225,13865712,-73279080,24647168,128406630,-29211840,-52843168,-196706304,134722224,165742416,-80873520,167282496,-182213314,-255874080,-145589976,408038400,308120442,101267712,-17125708,-786948864,-548895690,-447438528 | 
| 2565 |  |  |  |  |  |  | }; | 
| 2566 |  |  |  |  |  |  | #define NTAU (sizeof(tau_table)/sizeof(tau_table[0])) | 
| 2567 | 10 |  |  |  |  |  | IV ramanujan_tau(UV n) { | 
| 2568 | 10 | 100 |  |  |  |  | return (n < NTAU)  ?  tau_table[n]  :  0; | 
| 2569 |  |  |  |  |  |  | } | 
| 2570 |  |  |  |  |  |  |  | 
| 2571 | 405 |  |  |  |  |  | static UV _count_class_div(UV s, UV b2) { | 
| 2572 | 405 |  |  |  |  |  | UV h = 0, i, ndivisors, *divs, lim; | 
| 2573 |  |  |  |  |  |  |  | 
| 2574 | 405 |  |  |  |  |  | lim = isqrt(b2); | 
| 2575 | 405 | 100 |  |  |  |  | if (lim*lim == b2) lim--; | 
| 2576 | 405 | 100 |  |  |  |  | if (s > lim) return 0; | 
| 2577 |  |  |  |  |  |  |  | 
| 2578 | 362 | 100 |  |  |  |  | if ((lim-s) < 70) {  /* Iterate looking for divisors */ | 
| 2579 | 7104 | 100 |  |  |  |  | for (i = s; i <= lim; i++) | 
| 2580 | 6767 | 100 |  |  |  |  | if (b2 % i == 0) | 
| 2581 | 106 |  |  |  |  |  | h++; | 
| 2582 |  |  |  |  |  |  | } else {             /* Walk through all the divisors */ | 
| 2583 | 25 |  |  |  |  |  | divs = _divisor_list(b2, &ndivisors); | 
| 2584 | 56 | 50 |  |  |  |  | for (i = 0; i < ndivisors && divs[i] <= lim; i++) | 
|  |  | 100 |  |  |  |  |  | 
| 2585 | 31 | 100 |  |  |  |  | if (divs[i] >= s) | 
| 2586 | 6 |  |  |  |  |  | h++; | 
| 2587 | 25 |  |  |  |  |  | Safefree(divs); | 
| 2588 |  |  |  |  |  |  | } | 
| 2589 | 405 |  |  |  |  |  | return h; | 
| 2590 |  |  |  |  |  |  | } | 
| 2591 |  |  |  |  |  |  |  | 
| 2592 |  |  |  |  |  |  | /* Returns 12 * H(n).  See Cohen 5.3.5 or Pari/GP. | 
| 2593 |  |  |  |  |  |  | * Pari/GP uses a different method for n > 500000, which is quite a bit | 
| 2594 |  |  |  |  |  |  | * faster, but assumes the GRH. */ | 
| 2595 | 96 |  |  |  |  |  | IV hclassno(UV n) { | 
| 2596 | 96 |  |  |  |  |  | UV nmod4 = n % 4, b2, b, h; | 
| 2597 |  |  |  |  |  |  | int square; | 
| 2598 |  |  |  |  |  |  |  | 
| 2599 | 96 | 100 |  |  |  |  | if (n == 0) return -1; | 
| 2600 | 95 | 100 |  |  |  |  | if (nmod4 == 1 || nmod4 == 2) return 0; | 
|  |  | 100 |  |  |  |  |  | 
| 2601 | 74 | 100 |  |  |  |  | if (n == 3) return 4; | 
| 2602 |  |  |  |  |  |  |  | 
| 2603 | 73 |  |  |  |  |  | b = n & 1; | 
| 2604 | 73 |  |  |  |  |  | b2 = (n+1) >> 2; | 
| 2605 | 73 |  |  |  |  |  | square = is_perfect_square(b2); | 
| 2606 |  |  |  |  |  |  |  | 
| 2607 | 73 |  |  |  |  |  | h = divisor_sum(b2,0) >> 1; | 
| 2608 | 73 | 100 |  |  |  |  | if (b == 1) | 
| 2609 | 18 |  |  |  |  |  | h = 1 + square + ((h - 1) << 1); | 
| 2610 | 73 |  |  |  |  |  | b += 2; | 
| 2611 |  |  |  |  |  |  |  | 
| 2612 | 478 | 100 |  |  |  |  | for (;  b2 = (n + b*b) >> 2, 3*b2 < n;  b += 2) { | 
| 2613 | 810 |  |  |  |  |  | h += (b2 % b == 0) | 
| 2614 | 405 |  |  |  |  |  | +  is_perfect_square(b2) | 
| 2615 | 405 |  |  |  |  |  | +  (_count_class_div(b+1, b2) << 1); | 
| 2616 |  |  |  |  |  |  | } | 
| 2617 | 73 | 100 |  |  |  |  | return 12*h + ((b2*3 == n) ? 4 : square && !(n&1) ? 6 : 0); | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
| 2618 |  |  |  |  |  |  | } | 
| 2619 |  |  |  |  |  |  |  | 
| 2620 | 25300 |  |  |  |  |  | UV polygonal_root(UV n, UV k, int* overflow) { | 
| 2621 |  |  |  |  |  |  | UV D, R; | 
| 2622 | 25300 | 50 |  |  |  |  | MPUassert(k >= 3, "is_polygonal root < 3"); | 
| 2623 | 25300 |  |  |  |  |  | *overflow = 0; | 
| 2624 | 25300 | 100 |  |  |  |  | if (n <= 1) return n; | 
| 2625 | 25254 | 100 |  |  |  |  | if (k == 4) return is_perfect_square(n) ? isqrt(n) : 0; | 
|  |  | 100 |  |  |  |  |  | 
| 2626 | 25056 | 100 |  |  |  |  | if (k == 3) { | 
| 2627 | 108 | 50 |  |  |  |  | if (n >= UV_MAX/8) *overflow = 1; | 
| 2628 | 108 |  |  |  |  |  | D = n << 3; | 
| 2629 | 108 |  |  |  |  |  | R = 1; | 
| 2630 |  |  |  |  |  |  | } else { | 
| 2631 | 24948 | 50 |  |  |  |  | if (k > UV_MAX/k || n > UV_MAX/(8*k-16)) *overflow = 1; | 
|  |  | 50 |  |  |  |  |  | 
| 2632 | 24948 |  |  |  |  |  | D = (8*k-16) * n; | 
| 2633 | 24948 |  |  |  |  |  | R = (k-4) * (k-4); | 
| 2634 |  |  |  |  |  |  | } | 
| 2635 | 25056 | 50 |  |  |  |  | if (D+R <= D) *overflow = 1; | 
| 2636 | 25056 |  |  |  |  |  | D += R; | 
| 2637 | 25056 | 50 |  |  |  |  | if (*overflow || !is_perfect_square(D)) return 0; | 
|  |  | 100 |  |  |  |  |  | 
| 2638 | 918 |  |  |  |  |  | D = isqrt(D) + (k-4); | 
| 2639 | 918 |  |  |  |  |  | R = 2*k - 4; | 
| 2640 | 918 | 100 |  |  |  |  | if ((D % R) != 0) return 0; | 
| 2641 | 396 |  |  |  |  |  | return D/R; | 
| 2642 |  |  |  |  |  |  | } | 
| 2643 |  |  |  |  |  |  |  | 
| 2644 |  |  |  |  |  |  | /* These rank/unrank are O(n^2) algorithms using O(n) in-place space. | 
| 2645 |  |  |  |  |  |  | * Bonet 2008 gives O(n log n) algorithms using a bit more space. | 
| 2646 |  |  |  |  |  |  | */ | 
| 2647 |  |  |  |  |  |  |  | 
| 2648 | 5 |  |  |  |  |  | int num_to_perm(UV k, int n, int *vec) { | 
| 2649 | 5 |  |  |  |  |  | int i, j, t, si = 0; | 
| 2650 | 5 |  |  |  |  |  | UV f = factorial(n-1); | 
| 2651 |  |  |  |  |  |  |  | 
| 2652 | 8 | 100 |  |  |  |  | while (f == 0) /* We can handle n! overflow if we have a valid k */ | 
| 2653 | 3 |  |  |  |  |  | f = factorial(n - 1 - ++si); | 
| 2654 |  |  |  |  |  |  |  | 
| 2655 | 5 | 100 |  |  |  |  | if (k/f >= (UV)n) | 
| 2656 | 1 |  |  |  |  |  | k %= f*n; | 
| 2657 |  |  |  |  |  |  |  | 
| 2658 | 50 | 100 |  |  |  |  | for (i = 0; i < n; i++) | 
| 2659 | 45 |  |  |  |  |  | vec[i] = i; | 
| 2660 | 42 | 100 |  |  |  |  | for (i = si; i < n-1; i++) { | 
| 2661 | 37 |  |  |  |  |  | UV p = k/f; | 
| 2662 | 37 |  |  |  |  |  | k -= p*f; | 
| 2663 | 37 |  |  |  |  |  | f /= n-i-1; | 
| 2664 | 37 | 100 |  |  |  |  | if (p > 0) { | 
| 2665 | 66 | 100 |  |  |  |  | for (j = i+p, t = vec[j]; j > i; j--) | 
| 2666 | 47 |  |  |  |  |  | vec[j] = vec[j-1]; | 
| 2667 | 19 |  |  |  |  |  | vec[i] = t; | 
| 2668 |  |  |  |  |  |  | } | 
| 2669 |  |  |  |  |  |  | } | 
| 2670 | 5 |  |  |  |  |  | return 1; | 
| 2671 |  |  |  |  |  |  | } | 
| 2672 |  |  |  |  |  |  |  | 
| 2673 | 6 |  |  |  |  |  | int perm_to_num(int n, int *vec, UV *rank) { | 
| 2674 |  |  |  |  |  |  | int i, j, k; | 
| 2675 | 6 |  |  |  |  |  | UV f, num = 0; | 
| 2676 | 6 |  |  |  |  |  | f = factorial(n-1); | 
| 2677 | 6 | 100 |  |  |  |  | if (f == 0) return 0; | 
| 2678 | 42 | 100 |  |  |  |  | for (i = 0; i < n-1; i++) { | 
| 2679 | 340 | 100 |  |  |  |  | for (j = i+1, k = 0; j < n; j++) | 
| 2680 | 302 | 100 |  |  |  |  | if (vec[j] < vec[i]) | 
| 2681 | 137 |  |  |  |  |  | k++; | 
| 2682 | 38 | 50 |  |  |  |  | if ((UV)k > (UV_MAX-num)/f) return 0;  /* overflow */ | 
| 2683 | 38 |  |  |  |  |  | num += k*f; | 
| 2684 | 38 |  |  |  |  |  | f /= n-i-1; | 
| 2685 |  |  |  |  |  |  | } | 
| 2686 | 4 |  |  |  |  |  | *rank = num; | 
| 2687 | 4 |  |  |  |  |  | return 1; | 
| 2688 |  |  |  |  |  |  | } | 
| 2689 |  |  |  |  |  |  |  | 
| 2690 | 0 |  |  |  |  |  | static int numcmp(const void *a, const void *b) | 
| 2691 | 0 | 0 |  |  |  |  | { const UV *x = a, *y = b; return (*x > *y) ? 1 : (*x < *y) ? -1 : 0; } | 
|  |  | 0 |  |  |  |  |  | 
| 2692 |  |  |  |  |  |  |  | 
| 2693 |  |  |  |  |  |  | /* | 
| 2694 |  |  |  |  |  |  | * For k | 
| 2695 |  |  |  |  |  |  | *    https://www.math.upenn.edu/~wilf/website/CombinatorialAlgorithms.pdf | 
| 2696 |  |  |  |  |  |  | * Note it requires an O(k) complete shuffle as the results are sorted. | 
| 2697 |  |  |  |  |  |  | * | 
| 2698 |  |  |  |  |  |  | * This seems to be 4-100x faster than NumPy's random.{permutation,choice} | 
| 2699 |  |  |  |  |  |  | * for n under 100k or so.  It's even faster with larger n.  For example | 
| 2700 |  |  |  |  |  |  | *   from numpy.random import choice;  choice(100000000, 4, replace=False) | 
| 2701 |  |  |  |  |  |  | * uses 774MB and takes 55 seconds.  We take less than 1 microsecond. | 
| 2702 |  |  |  |  |  |  | */ | 
| 2703 | 3 |  |  |  |  |  | void randperm(void* ctx, UV n, UV k, UV *S) { | 
| 2704 |  |  |  |  |  |  | UV i, j; | 
| 2705 |  |  |  |  |  |  |  | 
| 2706 | 3 | 50 |  |  |  |  | if (k > n)  k = n; | 
| 2707 |  |  |  |  |  |  |  | 
| 2708 | 3 | 50 |  |  |  |  | if        (k == 0) {                  /* 0 of n */ | 
| 2709 | 3 | 100 |  |  |  |  | } else if (k == 1) {                  /* 1 of n.  Pick one at random */ | 
| 2710 | 1 |  |  |  |  |  | S[0] = urandomm64(ctx,n); | 
| 2711 | 2 | 50 |  |  |  |  | } else if (k == 2 && n == 2) {        /* 2 of 2.  Flip a coin */ | 
|  |  | 0 |  |  |  |  |  | 
| 2712 | 0 |  |  |  |  |  | S[0] = urandomb(ctx,1); | 
| 2713 | 0 |  |  |  |  |  | S[1] = 1-S[0]; | 
| 2714 | 2 | 50 |  |  |  |  | } else if (k == 2) {                  /* 2 of n.  Pick 2 skipping dup */ | 
| 2715 | 0 |  |  |  |  |  | S[0] = urandomm64(ctx,n); | 
| 2716 | 0 |  |  |  |  |  | S[1] = urandomm64(ctx,n-1); | 
| 2717 | 0 | 0 |  |  |  |  | if (S[1] >= S[0]) S[1]++; | 
| 2718 | 2 | 50 |  |  |  |  | } else if (k < n/100 && k < 30) {     /* k of n.  Pick k with loop */ | 
|  |  | 0 |  |  |  |  |  | 
| 2719 | 0 | 0 |  |  |  |  | for (i = 0; i < k; i++) { | 
| 2720 |  |  |  |  |  |  | do { | 
| 2721 | 0 |  |  |  |  |  | S[i] = urandomm64(ctx,n); | 
| 2722 | 0 | 0 |  |  |  |  | for (j = 0; j < i; j++) | 
| 2723 | 0 | 0 |  |  |  |  | if (S[j] == S[i]) | 
| 2724 | 0 |  |  |  |  |  | break; | 
| 2725 | 0 | 0 |  |  |  |  | } while (j < i); | 
| 2726 |  |  |  |  |  |  | } | 
| 2727 | 2 | 50 |  |  |  |  | } else if (k < n/100 && n > 1000000) {/* k of n.  Pick k with dedup retry */ | 
|  |  | 0 |  |  |  |  |  | 
| 2728 | 0 | 0 |  |  |  |  | for (j = 0; j < k; ) { | 
| 2729 | 0 | 0 |  |  |  |  | for (i = j; i < k; i++) /* Fill S[j .. k-1] then sort S */ | 
| 2730 | 0 |  |  |  |  |  | S[i] = urandomm64(ctx,n); | 
| 2731 | 0 |  |  |  |  |  | qsort(S, k, sizeof(UV), numcmp); | 
| 2732 | 0 | 0 |  |  |  |  | for (j = 0, i = 1; i < k; i++)  /* Find and remove dups.  O(n). */ | 
| 2733 | 0 | 0 |  |  |  |  | if (S[j] != S[i]) | 
| 2734 | 0 |  |  |  |  |  | S[++j] = S[i]; | 
| 2735 | 0 |  |  |  |  |  | j++; | 
| 2736 |  |  |  |  |  |  | } | 
| 2737 |  |  |  |  |  |  | /* S is sorted unique k-selection of 0 to n-1.  Shuffle. */ | 
| 2738 | 0 | 0 |  |  |  |  | for (i = 0; i < k; i++) { | 
| 2739 | 0 |  |  |  |  |  | j = urandomm64(ctx,k-i); | 
| 2740 | 0 |  |  |  |  |  | { UV t = S[i]; S[i] = S[i+j]; S[i+j] = t; } | 
| 2741 |  |  |  |  |  |  | } | 
| 2742 | 2 | 100 |  |  |  |  | } else if (k < n/4) {                 /* k of n.  Pick k with mask */ | 
| 2743 | 1 |  |  |  |  |  | uint32_t *mask, smask[8] = {0}; | 
| 2744 | 1 | 50 |  |  |  |  | if (n <= 32*8) mask = smask; | 
| 2745 | 0 | 0 |  |  |  |  | else           Newz(0, mask, n/32 + ((n%32)?1:0), uint32_t); | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
| 2746 | 5 | 100 |  |  |  |  | for (i = 0; i < k; i++) { | 
| 2747 |  |  |  |  |  |  | do { | 
| 2748 | 4 |  |  |  |  |  | j = urandomm64(ctx,n); | 
| 2749 | 4 | 50 |  |  |  |  | } while ( mask[j>>5] & (1U << (j&0x1F)) ); | 
| 2750 | 4 |  |  |  |  |  | S[i] = j; | 
| 2751 | 4 |  |  |  |  |  | mask[j>>5] |= (1U << (j&0x1F)); | 
| 2752 |  |  |  |  |  |  | } | 
| 2753 | 1 | 50 |  |  |  |  | if (mask != smask) Safefree(mask); | 
| 2754 | 1 | 50 |  |  |  |  | } else if (k < n) {                   /* k of n.  FYK shuffle n, pick k */ | 
| 2755 |  |  |  |  |  |  | UV *T; | 
| 2756 | 0 | 0 |  |  |  |  | New(0, T, n, UV); | 
| 2757 | 0 | 0 |  |  |  |  | for (i = 0; i < n; i++) | 
| 2758 | 0 |  |  |  |  |  | T[i] = i; | 
| 2759 | 0 | 0 |  |  |  |  | for (i = 0; i < k && i <= n-2; i++) { | 
|  |  | 0 |  |  |  |  |  | 
| 2760 | 0 |  |  |  |  |  | j = urandomm64(ctx,n-i); | 
| 2761 | 0 |  |  |  |  |  | S[i] = T[i+j]; | 
| 2762 | 0 |  |  |  |  |  | T[i+j] = T[i]; | 
| 2763 |  |  |  |  |  |  | } | 
| 2764 | 0 |  |  |  |  |  | Safefree(T); | 
| 2765 |  |  |  |  |  |  | } else {                              /* n of n.  FYK shuffle. */ | 
| 2766 | 101 | 100 |  |  |  |  | for (i = 0; i < n; i++) | 
| 2767 | 100 |  |  |  |  |  | S[i] = i; | 
| 2768 | 100 | 50 |  |  |  |  | for (i = 0; i < k && i <= n-2; i++) { | 
|  |  | 100 |  |  |  |  |  | 
| 2769 | 99 |  |  |  |  |  | j = urandomm64(ctx,n-i); | 
| 2770 | 99 |  |  |  |  |  | { UV t = S[i]; S[i] = S[i+j]; S[i+j] = t; } | 
| 2771 |  |  |  |  |  |  | } | 
| 2772 |  |  |  |  |  |  | } | 
| 2773 | 3 |  |  |  |  |  | } |