line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
#ifndef MPU_MULMOD_H |
2
|
|
|
|
|
|
|
#define MPU_MULMOD_H |
3
|
|
|
|
|
|
|
|
4
|
|
|
|
|
|
|
#include "ptypes.h" |
5
|
|
|
|
|
|
|
|
6
|
|
|
|
|
|
|
/* if n is smaller than this, you can multiply without overflow */ |
7
|
|
|
|
|
|
|
#define HALF_WORD (UVCONST(1) << (BITS_PER_WORD/2)) |
8
|
|
|
|
|
|
|
/* This will be true if we think mulmods are fast */ |
9
|
|
|
|
|
|
|
#define MULMODS_ARE_FAST 1 |
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
#if (BITS_PER_WORD == 32) && HAVE_STD_U64 |
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
/* We have 64-bit available, but UV is 32-bit. Do the math in 64-bit. |
14
|
|
|
|
|
|
|
* Even if it is emulated, it should be as fast or faster than us doing it. |
15
|
|
|
|
|
|
|
*/ |
16
|
|
|
|
|
|
|
#define addmod(a,b,n) (UV)( ((uint64_t)(a) + (b)) % (n) ) |
17
|
|
|
|
|
|
|
#define mulmod(a,b,n) (UV)( ((uint64_t)(a) * (b)) % (n) ) |
18
|
|
|
|
|
|
|
#define sqrmod(a,n) (UV)( ((uint64_t)(a) * (a)) % (n) ) |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
#elif defined(__GNUC__) && defined(__x86_64__) |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
/* GCC on a 64-bit Intel x86, help from WraithX and Wojciech Izykowski */ |
23
|
|
|
|
|
|
|
/* Beware: if (a*b)/c > 2^64, there will be an FP exception */ |
24
|
0
|
|
|
|
|
|
static INLINE UV _mulmod(UV a, UV b, UV n) { |
25
|
|
|
|
|
|
|
UV d, dummy; /* d will get a*b mod c */ |
26
|
0
|
|
|
|
|
|
asm ("mulq %3\n\t" /* mul a*b -> rdx:rax */ |
27
|
|
|
|
|
|
|
"divq %4\n\t" /* (a*b)/c -> quot in rax remainder in rdx */ |
28
|
|
|
|
|
|
|
:"=a"(dummy), "=&d"(d) /* output */ |
29
|
|
|
|
|
|
|
:"a"(a), "r"(b), "r"(n) /* input */ |
30
|
|
|
|
|
|
|
:"cc" /* mulq and divq can set conditions */ |
31
|
|
|
|
|
|
|
); |
32
|
0
|
|
|
|
|
|
return d; |
33
|
|
|
|
|
|
|
} |
34
|
|
|
|
|
|
|
#define mulmod(a,b,n) _mulmod(a,b,n) |
35
|
|
|
|
|
|
|
#define sqrmod(a,n) _mulmod(a,a,n) |
36
|
|
|
|
|
|
|
/* A version for _MSC_VER: |
37
|
|
|
|
|
|
|
* __asm { mov rax, qword ptr a |
38
|
|
|
|
|
|
|
* mul qword ptr b |
39
|
|
|
|
|
|
|
* div qword ptr c |
40
|
|
|
|
|
|
|
* mov qword ptr d, rdx } */ |
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
/* addmod from Kruppa 2010 page 67 */ |
43
|
0
|
|
|
|
|
|
static INLINE UV _addmod(UV a, UV b, UV n) { |
44
|
0
|
|
|
|
|
|
UV t = a-n; |
45
|
0
|
|
|
|
|
|
a += b; |
46
|
0
|
|
|
|
|
|
asm ("add %2, %1\n\t" /* t := t + b */ |
47
|
|
|
|
|
|
|
"cmovc %1, %0\n\t" /* if (carry) a := t */ |
48
|
|
|
|
|
|
|
:"+r" (a), "+&r" (t) |
49
|
|
|
|
|
|
|
:"r" (b) |
50
|
|
|
|
|
|
|
:"cc" |
51
|
|
|
|
|
|
|
); |
52
|
0
|
|
|
|
|
|
return a; |
53
|
|
|
|
|
|
|
} |
54
|
|
|
|
|
|
|
#define addmod(a,b,n) _addmod(a,b,n) |
55
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
#elif BITS_PER_WORD == 64 && HAVE_UINT128 |
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
/* We're 64-bit, using a modern gcc, and the target has some 128-bit type. |
59
|
|
|
|
|
|
|
* The actual number of targets that have this implemented are limited. */ |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
#define mulmod(a,b,n) (UV)( ((uint128_t)(a) * (b)) % (n) ) |
62
|
|
|
|
|
|
|
#define sqrmod(a,n) (UV)( ((uint128_t)(a) * (a)) % (n) ) |
63
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
#else |
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
/* UV is the largest integral type available (that we know of). */ |
67
|
|
|
|
|
|
|
#undef MULMODS_ARE_FAST |
68
|
|
|
|
|
|
|
#define MULMODS_ARE_FAST 0 |
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
/* Do it by hand */ |
71
|
|
|
|
|
|
|
static INLINE UV _mulmod(UV a, UV b, UV n) { |
72
|
|
|
|
|
|
|
UV r = 0; |
73
|
|
|
|
|
|
|
if (a >= n) a %= n; /* Careful attention from the caller should make */ |
74
|
|
|
|
|
|
|
if (b >= n) b %= n; /* these unnecessary. */ |
75
|
|
|
|
|
|
|
if ((a|b) < HALF_WORD) return (a*b) % n; |
76
|
|
|
|
|
|
|
if (a < b) { UV t = a; a = b; b = t; } |
77
|
|
|
|
|
|
|
if (n <= (UV_MAX>>1)) { |
78
|
|
|
|
|
|
|
while (b > 0) { |
79
|
|
|
|
|
|
|
if (b & 1) { r += a; if (r >= n) r -= n; } |
80
|
|
|
|
|
|
|
b >>= 1; |
81
|
|
|
|
|
|
|
if (b) { a += a; if (a >= n) a -= n; } |
82
|
|
|
|
|
|
|
} |
83
|
|
|
|
|
|
|
} else { |
84
|
|
|
|
|
|
|
while (b > 0) { |
85
|
|
|
|
|
|
|
if (b & 1) r = ((n-r) > a) ? r+a : r+a-n; /* r = (r + a) % n */ |
86
|
|
|
|
|
|
|
b >>= 1; |
87
|
|
|
|
|
|
|
if (b) a = ((n-a) > a) ? a+a : a+a-n; /* a = (a + a) % n */ |
88
|
|
|
|
|
|
|
} |
89
|
|
|
|
|
|
|
} |
90
|
|
|
|
|
|
|
return r; |
91
|
|
|
|
|
|
|
} |
92
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
#define mulmod(a,b,n) _mulmod(a,b,n) |
94
|
|
|
|
|
|
|
#define sqrmod(a,n) _mulmod(a,a,n) |
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
#endif |
97
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
#ifndef addmod |
99
|
|
|
|
|
|
|
static INLINE UV addmod(UV a, UV b, UV n) { |
100
|
|
|
|
|
|
|
return ((n-a) > b) ? a+b : a+b-n; |
101
|
|
|
|
|
|
|
} |
102
|
|
|
|
|
|
|
#endif |
103
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
static INLINE UV submod(UV a, UV b, UV n) { |
105
|
|
|
|
|
|
|
UV t = n-b; /* Evaluate as UV, then hand to addmod */ |
106
|
|
|
|
|
|
|
return addmod(a, t, n); |
107
|
|
|
|
|
|
|
} |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
/* a^2 + c mod n */ |
110
|
|
|
|
|
|
|
#define sqraddmod(a, c, n) addmod(sqrmod(a,n), c, n) |
111
|
|
|
|
|
|
|
/* a*b + c mod n */ |
112
|
|
|
|
|
|
|
#define muladdmod(a, b, c, n) addmod(mulmod(a,b,n), c, n) |
113
|
|
|
|
|
|
|
/* a*b - c mod n */ |
114
|
|
|
|
|
|
|
#define mulsubmod(a, b, c, n) submod(mulmod(a,b,n), c, n) |
115
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
/* a^k mod n */ |
117
|
|
|
|
|
|
|
#ifndef HALF_WORD |
118
|
|
|
|
|
|
|
static INLINE UV powmod(UV a, UV k, UV n) { |
119
|
|
|
|
|
|
|
UV t = 1; |
120
|
|
|
|
|
|
|
if (a >= n) a %= n; |
121
|
|
|
|
|
|
|
while (k) { |
122
|
|
|
|
|
|
|
if (k & 1) t = mulmod(t, a, n); |
123
|
|
|
|
|
|
|
k >>= 1; |
124
|
|
|
|
|
|
|
if (k) a = sqrmod(a, n); |
125
|
|
|
|
|
|
|
} |
126
|
|
|
|
|
|
|
return t; |
127
|
|
|
|
|
|
|
} |
128
|
|
|
|
|
|
|
#else |
129
|
0
|
|
|
|
|
|
static INLINE UV powmod(UV a, UV k, UV n) { |
130
|
0
|
|
|
|
|
|
UV t = 1; |
131
|
0
|
0
|
|
|
|
|
if (a >= n) a %= n; |
132
|
0
|
0
|
|
|
|
|
if (n < HALF_WORD) { |
133
|
0
|
0
|
|
|
|
|
while (k) { |
134
|
0
|
0
|
|
|
|
|
if (k & 1) t = (t*a)%n; |
135
|
0
|
|
|
|
|
|
k >>= 1; |
136
|
0
|
0
|
|
|
|
|
if (k) a = (a*a)%n; |
137
|
|
|
|
|
|
|
} |
138
|
|
|
|
|
|
|
} else { |
139
|
0
|
0
|
|
|
|
|
while (k) { |
140
|
0
|
0
|
|
|
|
|
if (k & 1) t = mulmod(t, a, n); |
141
|
0
|
|
|
|
|
|
k >>= 1; |
142
|
0
|
0
|
|
|
|
|
if (k) a = sqrmod(a, n); |
143
|
|
|
|
|
|
|
} |
144
|
|
|
|
|
|
|
} |
145
|
0
|
|
|
|
|
|
return t; |
146
|
|
|
|
|
|
|
} |
147
|
|
|
|
|
|
|
#endif |
148
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
/* a^k + c mod n */ |
150
|
|
|
|
|
|
|
#define powaddmod(a, k, c, n) addmod(powmod(a,k,n),c,n) |
151
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
#endif |