Bitcoin ABC 0.33.3
P2P Digital Currency
modinv32_impl.h
Go to the documentation of this file.
1/***********************************************************************
2 * Copyright (c) 2020 Peter Dettman *
3 * Distributed under the MIT software license, see the accompanying *
4 * file COPYING or https://www.opensource.org/licenses/mit-license.php.*
5 **********************************************************************/
6
7#ifndef SECP256K1_MODINV32_IMPL_H
8#define SECP256K1_MODINV32_IMPL_H
9
10#include "modinv32.h"
11
12#include "util.h"
13
14#include <stdlib.h>
15
16/* This file implements modular inversion based on the paper "Fast constant-time gcd computation and
17 * modular inversion" by Daniel J. Bernstein and Bo-Yin Yang.
18 *
19 * For an explanation of the algorithm, see doc/safegcd_implementation.md. This file contains an
20 * implementation for N=30, using 30-bit signed limbs represented as int32_t.
21 */
22
23#ifdef VERIFY
24static const secp256k1_modinv32_signed30 SECP256K1_SIGNED30_ONE = {{1}};
25
26/* Compute a*factor and put it in r. All but the top limb in r will be in range [0,2^30). */
27static void secp256k1_modinv32_mul_30(secp256k1_modinv32_signed30 *r, const secp256k1_modinv32_signed30 *a, int alen, int32_t factor) {
28 const int32_t M30 = (int32_t)(UINT32_MAX >> 2);
29 int64_t c = 0;
30 int i;
31 for (i = 0; i < 8; ++i) {
32 if (i < alen) c += (int64_t)a->v[i] * factor;
33 r->v[i] = (int32_t)c & M30; c >>= 30;
34 }
35 if (8 < alen) c += (int64_t)a->v[8] * factor;
36 VERIFY_CHECK(c == (int32_t)c);
37 r->v[8] = (int32_t)c;
38}
39
40/* Return -1 for a<b*factor, 0 for a==b*factor, 1 for a>b*factor. A consists of alen limbs; b has 9. */
41static int secp256k1_modinv32_mul_cmp_30(const secp256k1_modinv32_signed30 *a, int alen, const secp256k1_modinv32_signed30 *b, int32_t factor) {
42 int i;
44 secp256k1_modinv32_mul_30(&am, a, alen, 1); /* Normalize all but the top limb of a. */
45 secp256k1_modinv32_mul_30(&bm, b, 9, factor);
46 for (i = 0; i < 8; ++i) {
47 /* Verify that all but the top limb of a and b are normalized. */
48 VERIFY_CHECK(am.v[i] >> 30 == 0);
49 VERIFY_CHECK(bm.v[i] >> 30 == 0);
50 }
51 for (i = 8; i >= 0; --i) {
52 if (am.v[i] < bm.v[i]) return -1;
53 if (am.v[i] > bm.v[i]) return 1;
54 }
55 return 0;
56}
57#endif
58
59/* Take as input a signed30 number in range (-2*modulus,modulus), and add a multiple of the modulus
60 * to it to bring it to range [0,modulus). If sign < 0, the input will also be negated in the
61 * process. The input must have limbs in range (-2^30,2^30). The output will have limbs in range
62 * [0,2^30). */
64 const int32_t M30 = (int32_t)(UINT32_MAX >> 2);
65 int32_t r0 = r->v[0], r1 = r->v[1], r2 = r->v[2], r3 = r->v[3], r4 = r->v[4],
66 r5 = r->v[5], r6 = r->v[6], r7 = r->v[7], r8 = r->v[8];
67 volatile int32_t cond_add, cond_negate;
68
69#ifdef VERIFY
70 /* Verify that all limbs are in range (-2^30,2^30). */
71 int i;
72 for (i = 0; i < 9; ++i) {
73 VERIFY_CHECK(r->v[i] >= -M30);
74 VERIFY_CHECK(r->v[i] <= M30);
75 }
76 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(r, 9, &modinfo->modulus, -2) > 0); /* r > -2*modulus */
77 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(r, 9, &modinfo->modulus, 1) < 0); /* r < modulus */
78#endif
79
80 /* In a first step, add the modulus if the input is negative, and then negate if requested.
81 * This brings r from range (-2*modulus,modulus) to range (-modulus,modulus). As all input
82 * limbs are in range (-2^30,2^30), this cannot overflow an int32_t. Note that the right
83 * shifts below are signed sign-extending shifts (see assumptions.h for tests that that is
84 * indeed the behavior of the right shift operator). */
85 cond_add = r8 >> 31;
86 r0 += modinfo->modulus.v[0] & cond_add;
87 r1 += modinfo->modulus.v[1] & cond_add;
88 r2 += modinfo->modulus.v[2] & cond_add;
89 r3 += modinfo->modulus.v[3] & cond_add;
90 r4 += modinfo->modulus.v[4] & cond_add;
91 r5 += modinfo->modulus.v[5] & cond_add;
92 r6 += modinfo->modulus.v[6] & cond_add;
93 r7 += modinfo->modulus.v[7] & cond_add;
94 r8 += modinfo->modulus.v[8] & cond_add;
95 cond_negate = sign >> 31;
96 r0 = (r0 ^ cond_negate) - cond_negate;
97 r1 = (r1 ^ cond_negate) - cond_negate;
98 r2 = (r2 ^ cond_negate) - cond_negate;
99 r3 = (r3 ^ cond_negate) - cond_negate;
100 r4 = (r4 ^ cond_negate) - cond_negate;
101 r5 = (r5 ^ cond_negate) - cond_negate;
102 r6 = (r6 ^ cond_negate) - cond_negate;
103 r7 = (r7 ^ cond_negate) - cond_negate;
104 r8 = (r8 ^ cond_negate) - cond_negate;
105 /* Propagate the top bits, to bring limbs back to range (-2^30,2^30). */
106 r1 += r0 >> 30; r0 &= M30;
107 r2 += r1 >> 30; r1 &= M30;
108 r3 += r2 >> 30; r2 &= M30;
109 r4 += r3 >> 30; r3 &= M30;
110 r5 += r4 >> 30; r4 &= M30;
111 r6 += r5 >> 30; r5 &= M30;
112 r7 += r6 >> 30; r6 &= M30;
113 r8 += r7 >> 30; r7 &= M30;
114
115 /* In a second step add the modulus again if the result is still negative, bringing r to range
116 * [0,modulus). */
117 cond_add = r8 >> 31;
118 r0 += modinfo->modulus.v[0] & cond_add;
119 r1 += modinfo->modulus.v[1] & cond_add;
120 r2 += modinfo->modulus.v[2] & cond_add;
121 r3 += modinfo->modulus.v[3] & cond_add;
122 r4 += modinfo->modulus.v[4] & cond_add;
123 r5 += modinfo->modulus.v[5] & cond_add;
124 r6 += modinfo->modulus.v[6] & cond_add;
125 r7 += modinfo->modulus.v[7] & cond_add;
126 r8 += modinfo->modulus.v[8] & cond_add;
127 /* And propagate again. */
128 r1 += r0 >> 30; r0 &= M30;
129 r2 += r1 >> 30; r1 &= M30;
130 r3 += r2 >> 30; r2 &= M30;
131 r4 += r3 >> 30; r3 &= M30;
132 r5 += r4 >> 30; r4 &= M30;
133 r6 += r5 >> 30; r5 &= M30;
134 r7 += r6 >> 30; r6 &= M30;
135 r8 += r7 >> 30; r7 &= M30;
136
137 r->v[0] = r0;
138 r->v[1] = r1;
139 r->v[2] = r2;
140 r->v[3] = r3;
141 r->v[4] = r4;
142 r->v[5] = r5;
143 r->v[6] = r6;
144 r->v[7] = r7;
145 r->v[8] = r8;
146
147#ifdef VERIFY
148 VERIFY_CHECK(r0 >> 30 == 0);
149 VERIFY_CHECK(r1 >> 30 == 0);
150 VERIFY_CHECK(r2 >> 30 == 0);
151 VERIFY_CHECK(r3 >> 30 == 0);
152 VERIFY_CHECK(r4 >> 30 == 0);
153 VERIFY_CHECK(r5 >> 30 == 0);
154 VERIFY_CHECK(r6 >> 30 == 0);
155 VERIFY_CHECK(r7 >> 30 == 0);
156 VERIFY_CHECK(r8 >> 30 == 0);
157 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(r, 9, &modinfo->modulus, 0) >= 0); /* r >= 0 */
158 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(r, 9, &modinfo->modulus, 1) < 0); /* r < modulus */
159#endif
160}
161
162/* Data type for transition matrices (see section 3 of explanation).
163 *
164 * t = [ u v ]
165 * [ q r ]
166 */
167typedef struct {
168 int32_t u, v, q, r;
170
171/* Compute the transition matrix and zeta for 30 divsteps.
172 *
173 * Input: zeta: initial zeta
174 * f0: bottom limb of initial f
175 * g0: bottom limb of initial g
176 * Output: t: transition matrix
177 * Return: final zeta
178 *
179 * Implements the divsteps_n_matrix function from the explanation.
180 */
181static int32_t secp256k1_modinv32_divsteps_30(int32_t zeta, uint32_t f0, uint32_t g0, secp256k1_modinv32_trans2x2 *t) {
182 /* u,v,q,r are the elements of the transformation matrix being built up,
183 * starting with the identity matrix. Semantically they are signed integers
184 * in range [-2^30,2^30], but here represented as unsigned mod 2^32. This
185 * permits left shifting (which is UB for negative numbers). The range
186 * being inside [-2^31,2^31) means that casting to signed works correctly.
187 */
188 uint32_t u = 1, v = 0, q = 0, r = 1;
189 volatile uint32_t c1, c2;
190 uint32_t mask1, mask2, f = f0, g = g0, x, y, z;
191 int i;
192
193 for (i = 0; i < 30; ++i) {
194 VERIFY_CHECK((f & 1) == 1); /* f must always be odd */
195 VERIFY_CHECK((u * f0 + v * g0) == f << i);
196 VERIFY_CHECK((q * f0 + r * g0) == g << i);
197 /* Compute conditional masks for (zeta < 0) and for (g & 1). */
198 c1 = zeta >> 31;
199 mask1 = c1;
200 c2 = g & 1;
201 mask2 = -c2;
202 /* Compute x,y,z, conditionally negated versions of f,u,v. */
203 x = (f ^ mask1) - mask1;
204 y = (u ^ mask1) - mask1;
205 z = (v ^ mask1) - mask1;
206 /* Conditionally add x,y,z to g,q,r. */
207 g += x & mask2;
208 q += y & mask2;
209 r += z & mask2;
210 /* In what follows, mask1 is a condition mask for (zeta < 0) and (g & 1). */
211 mask1 &= mask2;
212 /* Conditionally change zeta into -zeta-2 or zeta-1. */
213 zeta = (zeta ^ mask1) - 1;
214 /* Conditionally add g,q,r to f,u,v. */
215 f += g & mask1;
216 u += q & mask1;
217 v += r & mask1;
218 /* Shifts */
219 g >>= 1;
220 u <<= 1;
221 v <<= 1;
222 /* Bounds on zeta that follow from the bounds on iteration count (max 20*30 divsteps). */
223 VERIFY_CHECK(zeta >= -601 && zeta <= 601);
224 }
225 /* Return data in t and return value. */
226 t->u = (int32_t)u;
227 t->v = (int32_t)v;
228 t->q = (int32_t)q;
229 t->r = (int32_t)r;
230 /* The determinant of t must be a power of two. This guarantees that multiplication with t
231 * does not change the gcd of f and g, apart from adding a power-of-2 factor to it (which
232 * will be divided out again). As each divstep's individual matrix has determinant 2, the
233 * aggregate of 30 of them will have determinant 2^30. */
234 VERIFY_CHECK((int64_t)t->u * t->r - (int64_t)t->v * t->q == ((int64_t)1) << 30);
235 return zeta;
236}
237
238/* secp256k1_modinv32_inv256[i] = -(2*i+1)^-1 (mod 256) */
239static const uint8_t secp256k1_modinv32_inv256[128] = {
240 0xFF, 0x55, 0x33, 0x49, 0xC7, 0x5D, 0x3B, 0x11, 0x0F, 0xE5, 0xC3, 0x59,
241 0xD7, 0xED, 0xCB, 0x21, 0x1F, 0x75, 0x53, 0x69, 0xE7, 0x7D, 0x5B, 0x31,
242 0x2F, 0x05, 0xE3, 0x79, 0xF7, 0x0D, 0xEB, 0x41, 0x3F, 0x95, 0x73, 0x89,
243 0x07, 0x9D, 0x7B, 0x51, 0x4F, 0x25, 0x03, 0x99, 0x17, 0x2D, 0x0B, 0x61,
244 0x5F, 0xB5, 0x93, 0xA9, 0x27, 0xBD, 0x9B, 0x71, 0x6F, 0x45, 0x23, 0xB9,
245 0x37, 0x4D, 0x2B, 0x81, 0x7F, 0xD5, 0xB3, 0xC9, 0x47, 0xDD, 0xBB, 0x91,
246 0x8F, 0x65, 0x43, 0xD9, 0x57, 0x6D, 0x4B, 0xA1, 0x9F, 0xF5, 0xD3, 0xE9,
247 0x67, 0xFD, 0xDB, 0xB1, 0xAF, 0x85, 0x63, 0xF9, 0x77, 0x8D, 0x6B, 0xC1,
248 0xBF, 0x15, 0xF3, 0x09, 0x87, 0x1D, 0xFB, 0xD1, 0xCF, 0xA5, 0x83, 0x19,
249 0x97, 0xAD, 0x8B, 0xE1, 0xDF, 0x35, 0x13, 0x29, 0xA7, 0x3D, 0x1B, 0xF1,
250 0xEF, 0xC5, 0xA3, 0x39, 0xB7, 0xCD, 0xAB, 0x01
251};
252
253/* Compute the transition matrix and eta for 30 divsteps (variable time).
254 *
255 * Input: eta: initial eta
256 * f0: bottom limb of initial f
257 * g0: bottom limb of initial g
258 * Output: t: transition matrix
259 * Return: final eta
260 *
261 * Implements the divsteps_n_matrix_var function from the explanation.
262 */
263static int32_t secp256k1_modinv32_divsteps_30_var(int32_t eta, uint32_t f0, uint32_t g0, secp256k1_modinv32_trans2x2 *t) {
264 /* Transformation matrix; see comments in secp256k1_modinv32_divsteps_30. */
265 uint32_t u = 1, v = 0, q = 0, r = 1;
266 uint32_t f = f0, g = g0, m;
267 uint16_t w;
268 int i = 30, limit, zeros;
269
270 for (;;) {
271 /* Use a sentinel bit to count zeros only up to i. */
272 zeros = secp256k1_ctz32_var(g | (UINT32_MAX << i));
273 /* Perform zeros divsteps at once; they all just divide g by two. */
274 g >>= zeros;
275 u <<= zeros;
276 v <<= zeros;
277 eta -= zeros;
278 i -= zeros;
279 /* We're done once we've done 30 divsteps. */
280 if (i == 0) break;
281 VERIFY_CHECK((f & 1) == 1);
282 VERIFY_CHECK((g & 1) == 1);
283 VERIFY_CHECK((u * f0 + v * g0) == f << (30 - i));
284 VERIFY_CHECK((q * f0 + r * g0) == g << (30 - i));
285 /* Bounds on eta that follow from the bounds on iteration count (max 25*30 divsteps). */
286 VERIFY_CHECK(eta >= -751 && eta <= 751);
287 /* If eta is negative, negate it and replace f,g with g,-f. */
288 if (eta < 0) {
289 uint32_t tmp;
290 eta = -eta;
291 tmp = f; f = g; g = -tmp;
292 tmp = u; u = q; q = -tmp;
293 tmp = v; v = r; r = -tmp;
294 }
295 /* eta is now >= 0. In what follows we're going to cancel out the bottom bits of g. No more
296 * than i can be cancelled out (as we'd be done before that point), and no more than eta+1
297 * can be done as its sign will flip once that happens. */
298 limit = ((int)eta + 1) > i ? i : ((int)eta + 1);
299 /* m is a mask for the bottom min(limit, 8) bits (our table only supports 8 bits). */
300 VERIFY_CHECK(limit > 0 && limit <= 30);
301 m = (UINT32_MAX >> (32 - limit)) & 255U;
302 /* Find what multiple of f must be added to g to cancel its bottom min(limit, 8) bits. */
303 w = (g * secp256k1_modinv32_inv256[(f >> 1) & 127]) & m;
304 /* Do so. */
305 g += f * w;
306 q += u * w;
307 r += v * w;
308 VERIFY_CHECK((g & m) == 0);
309 }
310 /* Return data in t and return value. */
311 t->u = (int32_t)u;
312 t->v = (int32_t)v;
313 t->q = (int32_t)q;
314 t->r = (int32_t)r;
315 /* The determinant of t must be a power of two. This guarantees that multiplication with t
316 * does not change the gcd of f and g, apart from adding a power-of-2 factor to it (which
317 * will be divided out again). As each divstep's individual matrix has determinant 2, the
318 * aggregate of 30 of them will have determinant 2^30. */
319 VERIFY_CHECK((int64_t)t->u * t->r - (int64_t)t->v * t->q == ((int64_t)1) << 30);
320 return eta;
321}
322
323/* Compute the transition matrix and eta for 30 posdivsteps (variable time, eta=-delta), and keeps track
324 * of the Jacobi symbol along the way. f0 and g0 must be f and g mod 2^32 rather than 2^30, because
325 * Jacobi tracking requires knowing (f mod 8) rather than just (f mod 2).
326 *
327 * Input: eta: initial eta
328 * f0: bottom limb of initial f
329 * g0: bottom limb of initial g
330 * Output: t: transition matrix
331 * Input/Output: (*jacp & 1) is bitflipped if and only if the Jacobi symbol of (f | g) changes sign
332 * by applying the returned transformation matrix to it. The other bits of *jacp may
333 * change, but are meaningless.
334 * Return: final eta
335 */
336static int32_t secp256k1_modinv32_posdivsteps_30_var(int32_t eta, uint32_t f0, uint32_t g0, secp256k1_modinv32_trans2x2 *t, int *jacp) {
337 /* Transformation matrix. */
338 uint32_t u = 1, v = 0, q = 0, r = 1;
339 uint32_t f = f0, g = g0, m;
340 uint16_t w;
341 int i = 30, limit, zeros;
342 int jac = *jacp;
343
344 for (;;) {
345 /* Use a sentinel bit to count zeros only up to i. */
346 zeros = secp256k1_ctz32_var(g | (UINT32_MAX << i));
347 /* Perform zeros divsteps at once; they all just divide g by two. */
348 g >>= zeros;
349 u <<= zeros;
350 v <<= zeros;
351 eta -= zeros;
352 i -= zeros;
353 /* Update the bottom bit of jac: when dividing g by an odd power of 2,
354 * if (f mod 8) is 3 or 5, the Jacobi symbol changes sign. */
355 jac ^= (zeros & ((f >> 1) ^ (f >> 2)));
356 /* We're done once we've done 30 posdivsteps. */
357 if (i == 0) break;
358 VERIFY_CHECK((f & 1) == 1);
359 VERIFY_CHECK((g & 1) == 1);
360 VERIFY_CHECK((u * f0 + v * g0) == f << (30 - i));
361 VERIFY_CHECK((q * f0 + r * g0) == g << (30 - i));
362 /* If eta is negative, negate it and replace f,g with g,f. */
363 if (eta < 0) {
364 uint32_t tmp;
365 eta = -eta;
366 /* Update bottom bit of jac: when swapping f and g, the Jacobi symbol changes sign
367 * if both f and g are 3 mod 4. */
368 jac ^= ((f & g) >> 1);
369 tmp = f; f = g; g = tmp;
370 tmp = u; u = q; q = tmp;
371 tmp = v; v = r; r = tmp;
372 }
373 /* eta is now >= 0. In what follows we're going to cancel out the bottom bits of g. No more
374 * than i can be cancelled out (as we'd be done before that point), and no more than eta+1
375 * can be done as its sign will flip once that happens. */
376 limit = ((int)eta + 1) > i ? i : ((int)eta + 1);
377 /* m is a mask for the bottom min(limit, 8) bits (our table only supports 8 bits). */
378 VERIFY_CHECK(limit > 0 && limit <= 30);
379 m = (UINT32_MAX >> (32 - limit)) & 255U;
380 /* Find what multiple of f must be added to g to cancel its bottom min(limit, 8) bits. */
381 w = (g * secp256k1_modinv32_inv256[(f >> 1) & 127]) & m;
382 /* Do so. */
383 g += f * w;
384 q += u * w;
385 r += v * w;
386 VERIFY_CHECK((g & m) == 0);
387 }
388 /* Return data in t and return value. */
389 t->u = (int32_t)u;
390 t->v = (int32_t)v;
391 t->q = (int32_t)q;
392 t->r = (int32_t)r;
393 /* The determinant of t must be a power of two. This guarantees that multiplication with t
394 * does not change the gcd of f and g, apart from adding a power-of-2 factor to it (which
395 * will be divided out again). As each divstep's individual matrix has determinant 2 or -2,
396 * the aggregate of 30 of them will have determinant 2^30 or -2^30. */
397 VERIFY_CHECK((int64_t)t->u * t->r - (int64_t)t->v * t->q == ((int64_t)1) << 30 ||
398 (int64_t)t->u * t->r - (int64_t)t->v * t->q == -(((int64_t)1) << 30));
399 *jacp = jac;
400 return eta;
401}
402
403/* Compute (t/2^30) * [d, e] mod modulus, where t is a transition matrix for 30 divsteps.
404 *
405 * On input and output, d and e are in range (-2*modulus,modulus). All output limbs will be in range
406 * (-2^30,2^30).
407 *
408 * This implements the update_de function from the explanation.
409 */
411 const int32_t M30 = (int32_t)(UINT32_MAX >> 2);
412 const int32_t u = t->u, v = t->v, q = t->q, r = t->r;
413 int32_t di, ei, md, me, sd, se;
414 int64_t cd, ce;
415 int i;
416#ifdef VERIFY
417 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(d, 9, &modinfo->modulus, -2) > 0); /* d > -2*modulus */
418 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(d, 9, &modinfo->modulus, 1) < 0); /* d < modulus */
419 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(e, 9, &modinfo->modulus, -2) > 0); /* e > -2*modulus */
420 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(e, 9, &modinfo->modulus, 1) < 0); /* e < modulus */
421 VERIFY_CHECK(labs(u) <= (M30 + 1 - labs(v))); /* |u|+|v| <= 2^30 */
422 VERIFY_CHECK(labs(q) <= (M30 + 1 - labs(r))); /* |q|+|r| <= 2^30 */
423#endif
424 /* [md,me] start as zero; plus [u,q] if d is negative; plus [v,r] if e is negative. */
425 sd = d->v[8] >> 31;
426 se = e->v[8] >> 31;
427 md = (u & sd) + (v & se);
428 me = (q & sd) + (r & se);
429 /* Begin computing t*[d,e]. */
430 di = d->v[0];
431 ei = e->v[0];
432 cd = (int64_t)u * di + (int64_t)v * ei;
433 ce = (int64_t)q * di + (int64_t)r * ei;
434 /* Correct md,me so that t*[d,e]+modulus*[md,me] has 30 zero bottom bits. */
435 md -= (modinfo->modulus_inv30 * (uint32_t)cd + md) & M30;
436 me -= (modinfo->modulus_inv30 * (uint32_t)ce + me) & M30;
437 /* Update the beginning of computation for t*[d,e]+modulus*[md,me] now md,me are known. */
438 cd += (int64_t)modinfo->modulus.v[0] * md;
439 ce += (int64_t)modinfo->modulus.v[0] * me;
440 /* Verify that the low 30 bits of the computation are indeed zero, and then throw them away. */
441 VERIFY_CHECK(((int32_t)cd & M30) == 0); cd >>= 30;
442 VERIFY_CHECK(((int32_t)ce & M30) == 0); ce >>= 30;
443 /* Now iteratively compute limb i=1..8 of t*[d,e]+modulus*[md,me], and store them in output
444 * limb i-1 (shifting down by 30 bits). */
445 for (i = 1; i < 9; ++i) {
446 di = d->v[i];
447 ei = e->v[i];
448 cd += (int64_t)u * di + (int64_t)v * ei;
449 ce += (int64_t)q * di + (int64_t)r * ei;
450 cd += (int64_t)modinfo->modulus.v[i] * md;
451 ce += (int64_t)modinfo->modulus.v[i] * me;
452 d->v[i - 1] = (int32_t)cd & M30; cd >>= 30;
453 e->v[i - 1] = (int32_t)ce & M30; ce >>= 30;
454 }
455 /* What remains is limb 9 of t*[d,e]+modulus*[md,me]; store it as output limb 8. */
456 d->v[8] = (int32_t)cd;
457 e->v[8] = (int32_t)ce;
458#ifdef VERIFY
459 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(d, 9, &modinfo->modulus, -2) > 0); /* d > -2*modulus */
460 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(d, 9, &modinfo->modulus, 1) < 0); /* d < modulus */
461 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(e, 9, &modinfo->modulus, -2) > 0); /* e > -2*modulus */
462 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(e, 9, &modinfo->modulus, 1) < 0); /* e < modulus */
463#endif
464}
465
466/* Compute (t/2^30) * [f, g], where t is a transition matrix for 30 divsteps.
467 *
468 * This implements the update_fg function from the explanation.
469 */
471 const int32_t M30 = (int32_t)(UINT32_MAX >> 2);
472 const int32_t u = t->u, v = t->v, q = t->q, r = t->r;
473 int32_t fi, gi;
474 int64_t cf, cg;
475 int i;
476 /* Start computing t*[f,g]. */
477 fi = f->v[0];
478 gi = g->v[0];
479 cf = (int64_t)u * fi + (int64_t)v * gi;
480 cg = (int64_t)q * fi + (int64_t)r * gi;
481 /* Verify that the bottom 30 bits of the result are zero, and then throw them away. */
482 VERIFY_CHECK(((int32_t)cf & M30) == 0); cf >>= 30;
483 VERIFY_CHECK(((int32_t)cg & M30) == 0); cg >>= 30;
484 /* Now iteratively compute limb i=1..8 of t*[f,g], and store them in output limb i-1 (shifting
485 * down by 30 bits). */
486 for (i = 1; i < 9; ++i) {
487 fi = f->v[i];
488 gi = g->v[i];
489 cf += (int64_t)u * fi + (int64_t)v * gi;
490 cg += (int64_t)q * fi + (int64_t)r * gi;
491 f->v[i - 1] = (int32_t)cf & M30; cf >>= 30;
492 g->v[i - 1] = (int32_t)cg & M30; cg >>= 30;
493 }
494 /* What remains is limb 9 of t*[f,g]; store it as output limb 8. */
495 f->v[8] = (int32_t)cf;
496 g->v[8] = (int32_t)cg;
497}
498
499/* Compute (t/2^30) * [f, g], where t is a transition matrix for 30 divsteps.
500 *
501 * Version that operates on a variable number of limbs in f and g.
502 *
503 * This implements the update_fg function from the explanation in modinv64_impl.h.
504 */
506 const int32_t M30 = (int32_t)(UINT32_MAX >> 2);
507 const int32_t u = t->u, v = t->v, q = t->q, r = t->r;
508 int32_t fi, gi;
509 int64_t cf, cg;
510 int i;
511 VERIFY_CHECK(len > 0);
512 /* Start computing t*[f,g]. */
513 fi = f->v[0];
514 gi = g->v[0];
515 cf = (int64_t)u * fi + (int64_t)v * gi;
516 cg = (int64_t)q * fi + (int64_t)r * gi;
517 /* Verify that the bottom 62 bits of the result are zero, and then throw them away. */
518 VERIFY_CHECK(((int32_t)cf & M30) == 0); cf >>= 30;
519 VERIFY_CHECK(((int32_t)cg & M30) == 0); cg >>= 30;
520 /* Now iteratively compute limb i=1..len of t*[f,g], and store them in output limb i-1 (shifting
521 * down by 30 bits). */
522 for (i = 1; i < len; ++i) {
523 fi = f->v[i];
524 gi = g->v[i];
525 cf += (int64_t)u * fi + (int64_t)v * gi;
526 cg += (int64_t)q * fi + (int64_t)r * gi;
527 f->v[i - 1] = (int32_t)cf & M30; cf >>= 30;
528 g->v[i - 1] = (int32_t)cg & M30; cg >>= 30;
529 }
530 /* What remains is limb (len) of t*[f,g]; store it as output limb (len-1). */
531 f->v[len - 1] = (int32_t)cf;
532 g->v[len - 1] = (int32_t)cg;
533}
534
535/* Compute the inverse of x modulo modinfo->modulus, and replace x with it (constant time in x). */
537 /* Start with d=0, e=1, f=modulus, g=x, zeta=-1. */
542 int i;
543 int32_t zeta = -1; /* zeta = -(delta+1/2); delta is initially 1/2. */
544
545 /* Do 20 iterations of 30 divsteps each = 600 divsteps. 590 suffices for 256-bit inputs. */
546 for (i = 0; i < 20; ++i) {
547 /* Compute transition matrix and new zeta after 30 divsteps. */
549 zeta = secp256k1_modinv32_divsteps_30(zeta, f.v[0], g.v[0], &t);
550 /* Update d,e using that transition matrix. */
551 secp256k1_modinv32_update_de_30(&d, &e, &t, modinfo);
552 /* Update f,g using that transition matrix. */
553#ifdef VERIFY
554 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, 9, &modinfo->modulus, -1) > 0); /* f > -modulus */
555 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, 9, &modinfo->modulus, 1) <= 0); /* f <= modulus */
556 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, 9, &modinfo->modulus, -1) > 0); /* g > -modulus */
557 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, 9, &modinfo->modulus, 1) < 0); /* g < modulus */
558#endif
560#ifdef VERIFY
561 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, 9, &modinfo->modulus, -1) > 0); /* f > -modulus */
562 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, 9, &modinfo->modulus, 1) <= 0); /* f <= modulus */
563 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, 9, &modinfo->modulus, -1) > 0); /* g > -modulus */
564 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, 9, &modinfo->modulus, 1) < 0); /* g < modulus */
565#endif
566 }
567
568 /* At this point sufficient iterations have been performed that g must have reached 0
569 * and (if g was not originally 0) f must now equal +/- GCD of the initial f, g
570 * values i.e. +/- 1, and d now contains +/- the modular inverse. */
571#ifdef VERIFY
572 /* g == 0 */
573 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, 9, &SECP256K1_SIGNED30_ONE, 0) == 0);
574 /* |f| == 1, or (x == 0 and d == 0 and |f|=modulus) */
575 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, 9, &SECP256K1_SIGNED30_ONE, -1) == 0 ||
576 secp256k1_modinv32_mul_cmp_30(&f, 9, &SECP256K1_SIGNED30_ONE, 1) == 0 ||
577 (secp256k1_modinv32_mul_cmp_30(x, 9, &SECP256K1_SIGNED30_ONE, 0) == 0 &&
578 secp256k1_modinv32_mul_cmp_30(&d, 9, &SECP256K1_SIGNED30_ONE, 0) == 0 &&
579 (secp256k1_modinv32_mul_cmp_30(&f, 9, &modinfo->modulus, 1) == 0 ||
580 secp256k1_modinv32_mul_cmp_30(&f, 9, &modinfo->modulus, -1) == 0)));
581#endif
582
583 /* Optionally negate d, normalize to [0,modulus), and return it. */
584 secp256k1_modinv32_normalize_30(&d, f.v[8], modinfo);
585 *x = d;
586}
587
588/* Compute the inverse of x modulo modinfo->modulus, and replace x with it (variable time). */
590 /* Start with d=0, e=1, f=modulus, g=x, eta=-1. */
591 secp256k1_modinv32_signed30 d = {{0, 0, 0, 0, 0, 0, 0, 0, 0}};
592 secp256k1_modinv32_signed30 e = {{1, 0, 0, 0, 0, 0, 0, 0, 0}};
595#ifdef VERIFY
596 int i = 0;
597#endif
598 int j, len = 9;
599 int32_t eta = -1; /* eta = -delta; delta is initially 1 (faster for the variable-time code) */
600 int32_t cond, fn, gn;
601
602 /* Do iterations of 30 divsteps each until g=0. */
603 while (1) {
604 /* Compute transition matrix and new eta after 30 divsteps. */
606 eta = secp256k1_modinv32_divsteps_30_var(eta, f.v[0], g.v[0], &t);
607 /* Update d,e using that transition matrix. */
608 secp256k1_modinv32_update_de_30(&d, &e, &t, modinfo);
609 /* Update f,g using that transition matrix. */
610#ifdef VERIFY
611 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, -1) > 0); /* f > -modulus */
612 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, 1) <= 0); /* f <= modulus */
613 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &modinfo->modulus, -1) > 0); /* g > -modulus */
614 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &modinfo->modulus, 1) < 0); /* g < modulus */
615#endif
617 /* If the bottom limb of g is 0, there is a chance g=0. */
618 if (g.v[0] == 0) {
619 cond = 0;
620 /* Check if all other limbs are also 0. */
621 for (j = 1; j < len; ++j) {
622 cond |= g.v[j];
623 }
624 /* If so, we're done. */
625 if (cond == 0) break;
626 }
627
628 /* Determine if len>1 and limb (len-1) of both f and g is 0 or -1. */
629 fn = f.v[len - 1];
630 gn = g.v[len - 1];
631 cond = ((int32_t)len - 2) >> 31;
632 cond |= fn ^ (fn >> 31);
633 cond |= gn ^ (gn >> 31);
634 /* If so, reduce length, propagating the sign of f and g's top limb into the one below. */
635 if (cond == 0) {
636 f.v[len - 2] |= (uint32_t)fn << 30;
637 g.v[len - 2] |= (uint32_t)gn << 30;
638 --len;
639 }
640#ifdef VERIFY
641 VERIFY_CHECK(++i < 25); /* We should never need more than 25*30 = 750 divsteps */
642 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, -1) > 0); /* f > -modulus */
643 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, 1) <= 0); /* f <= modulus */
644 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &modinfo->modulus, -1) > 0); /* g > -modulus */
645 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &modinfo->modulus, 1) < 0); /* g < modulus */
646#endif
647 }
648
649 /* At this point g is 0 and (if g was not originally 0) f must now equal +/- GCD of
650 * the initial f, g values i.e. +/- 1, and d now contains +/- the modular inverse. */
651#ifdef VERIFY
652 /* g == 0 */
653 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &SECP256K1_SIGNED30_ONE, 0) == 0);
654 /* |f| == 1, or (x == 0 and d == 0 and |f|=modulus) */
655 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &SECP256K1_SIGNED30_ONE, -1) == 0 ||
656 secp256k1_modinv32_mul_cmp_30(&f, len, &SECP256K1_SIGNED30_ONE, 1) == 0 ||
657 (secp256k1_modinv32_mul_cmp_30(x, 9, &SECP256K1_SIGNED30_ONE, 0) == 0 &&
658 secp256k1_modinv32_mul_cmp_30(&d, 9, &SECP256K1_SIGNED30_ONE, 0) == 0 &&
659 (secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, 1) == 0 ||
660 secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, -1) == 0)));
661#endif
662
663 /* Optionally negate d, normalize to [0,modulus), and return it. */
664 secp256k1_modinv32_normalize_30(&d, f.v[len - 1], modinfo);
665 *x = d;
666}
667
668/* Do up to 50 iterations of 30 posdivsteps (up to 1500 steps; more is extremely rare) each until f=1.
669 * In VERIFY mode use a lower number of iterations (750, close to the median 756), so failure actually occurs. */
670#ifdef VERIFY
671#define JACOBI32_ITERATIONS 25
672#else
673#define JACOBI32_ITERATIONS 50
674#endif
675
676/* Compute the Jacobi symbol of x modulo modinfo->modulus (variable time). gcd(x,modulus) must be 1. */
678 /* Start with f=modulus, g=x, eta=-1. */
681 int j, len = 9;
682 int32_t eta = -1; /* eta = -delta; delta is initially 1 */
683 int32_t cond, fn, gn;
684 int jac = 0;
685 int count;
686
687 /* The input limbs must all be non-negative. */
688 VERIFY_CHECK(g.v[0] >= 0 && g.v[1] >= 0 && g.v[2] >= 0 && g.v[3] >= 0 && g.v[4] >= 0 && g.v[5] >= 0 && g.v[6] >= 0 && g.v[7] >= 0 && g.v[8] >= 0);
689
690 /* If x > 0, then if the loop below converges, it converges to f=g=gcd(x,modulus). Since we
691 * require that gcd(x,modulus)=1 and modulus>=3, x cannot be 0. Thus, we must reach f=1 (or
692 * time out). */
693 VERIFY_CHECK((g.v[0] | g.v[1] | g.v[2] | g.v[3] | g.v[4] | g.v[5] | g.v[6] | g.v[7] | g.v[8]) != 0);
694
695 for (count = 0; count < JACOBI32_ITERATIONS; ++count) {
696 /* Compute transition matrix and new eta after 30 posdivsteps. */
698 eta = secp256k1_modinv32_posdivsteps_30_var(eta, f.v[0] | ((uint32_t)f.v[1] << 30), g.v[0] | ((uint32_t)g.v[1] << 30), &t, &jac);
699 /* Update f,g using that transition matrix. */
700#ifdef VERIFY
701 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, 0) > 0); /* f > 0 */
702 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, 1) <= 0); /* f <= modulus */
703 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &modinfo->modulus, 0) > 0); /* g > 0 */
704 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &modinfo->modulus, 1) < 0); /* g < modulus */
705#endif
707 /* If the bottom limb of f is 1, there is a chance that f=1. */
708 if (f.v[0] == 1) {
709 cond = 0;
710 /* Check if the other limbs are also 0. */
711 for (j = 1; j < len; ++j) {
712 cond |= f.v[j];
713 }
714 /* If so, we're done. If f=1, the Jacobi symbol (g | f)=1. */
715 if (cond == 0) return 1 - 2*(jac & 1);
716 }
717
718 /* Determine if len>1 and limb (len-1) of both f and g is 0. */
719 fn = f.v[len - 1];
720 gn = g.v[len - 1];
721 cond = ((int32_t)len - 2) >> 31;
722 cond |= fn;
723 cond |= gn;
724 /* If so, reduce length. */
725 if (cond == 0) --len;
726#ifdef VERIFY
727 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, 0) > 0); /* f > 0 */
728 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, 1) <= 0); /* f <= modulus */
729 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &modinfo->modulus, 0) > 0); /* g > 0 */
730 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &modinfo->modulus, 1) < 0); /* g < modulus */
731#endif
732 }
733
734 /* The loop failed to converge to f=g after 1500 iterations. Return 0, indicating unknown result. */
735 return 0;
736}
737
738#endif /* SECP256K1_MODINV32_IMPL_H */
static void secp256k1_modinv32_update_fg_30_var(int len, secp256k1_modinv32_signed30 *f, secp256k1_modinv32_signed30 *g, const secp256k1_modinv32_trans2x2 *t)
static void secp256k1_modinv32_var(secp256k1_modinv32_signed30 *x, const secp256k1_modinv32_modinfo *modinfo)
static int32_t secp256k1_modinv32_divsteps_30_var(int32_t eta, uint32_t f0, uint32_t g0, secp256k1_modinv32_trans2x2 *t)
static void secp256k1_modinv32_normalize_30(secp256k1_modinv32_signed30 *r, int32_t sign, const secp256k1_modinv32_modinfo *modinfo)
Definition: modinv32_impl.h:63
#define JACOBI32_ITERATIONS
static int32_t secp256k1_modinv32_posdivsteps_30_var(int32_t eta, uint32_t f0, uint32_t g0, secp256k1_modinv32_trans2x2 *t, int *jacp)
static void secp256k1_modinv32(secp256k1_modinv32_signed30 *x, const secp256k1_modinv32_modinfo *modinfo)
static int32_t secp256k1_modinv32_divsteps_30(int32_t zeta, uint32_t f0, uint32_t g0, secp256k1_modinv32_trans2x2 *t)
static int secp256k1_jacobi32_maybe_var(const secp256k1_modinv32_signed30 *x, const secp256k1_modinv32_modinfo *modinfo)
static void secp256k1_modinv32_update_fg_30(secp256k1_modinv32_signed30 *f, secp256k1_modinv32_signed30 *g, const secp256k1_modinv32_trans2x2 *t)
static const uint8_t secp256k1_modinv32_inv256[128]
static void secp256k1_modinv32_update_de_30(secp256k1_modinv32_signed30 *d, secp256k1_modinv32_signed30 *e, const secp256k1_modinv32_trans2x2 *t, const secp256k1_modinv32_modinfo *modinfo)
static SECP256K1_INLINE int secp256k1_ctz32_var(uint32_t x)
Definition: util.h:305
#define VERIFY_CHECK(cond)
Definition: util.h:130
secp256k1_modinv32_signed30 modulus
Definition: modinv32.h:21
static int count