diff --git a/src/scalar_4x64_impl.h b/src/scalar_4x64_impl.h index 2d81006c00..010d8f279d 100644 --- a/src/scalar_4x64_impl.h +++ b/src/scalar_4x64_impl.h @@ -181,6 +181,31 @@ static int secp256k1_scalar_cond_negate(secp256k1_scalar *r, int flag) { return 2 * (mask == 0) - 1; } +static int secp256k1_scalar_complement(secp256k1_scalar *r, const secp256k1_scalar *a) { + uint128_t t = 1; + t += ~a->d[0]; + r->d[0] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64; + t += ~a->d[1]; + r->d[1] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64; + t += ~a->d[2]; + r->d[2] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64; + t += ~a->d[3]; + r->d[3] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64; + return t; +} + +static int secp256k1_scalar_binadd(secp256k1_scalar *r, const secp256k1_scalar *a, const secp256k1_scalar *b) { + uint128_t t = (uint128_t)a->d[0] + b->d[0]; + r->d[0] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64; + t += (uint128_t)a->d[1] + b->d[1]; + r->d[1] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64; + t += (uint128_t)a->d[2] + b->d[2]; + r->d[2] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64; + t += (uint128_t)a->d[3] + b->d[3]; + r->d[3] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64; + return t; +} + /* Inspired by the macros in OpenSSL's crypto/bn/asm/x86_64-gcc.c. */ /** Add a*b to the number defined by (c0,c1,c2). c2 must never overflow. */ @@ -929,6 +954,19 @@ SECP256K1_INLINE static int secp256k1_scalar_eq(const secp256k1_scalar *a, const return ((a->d[0] ^ b->d[0]) | (a->d[1] ^ b->d[1]) | (a->d[2] ^ b->d[2]) | (a->d[3] ^ b->d[3])) == 0; } +SECP256K1_INLINE static int secp256k1_scalar_cmp_var(const secp256k1_scalar *a, const secp256k1_scalar *b) { + int i; + for (i = 3; i >= 0; i--) { + if (a->d[i] > b->d[i]) { + return 1; + } + if (a->d[i] < b->d[i]) { + return -1; + } + } + return 0; +} + SECP256K1_INLINE static void secp256k1_scalar_mul_shift_var(secp256k1_scalar *r, const secp256k1_scalar *a, const secp256k1_scalar *b, unsigned int shift) { uint64_t l[8]; unsigned int shiftlimbs; diff --git a/src/scalar_8x32_impl.h b/src/scalar_8x32_impl.h index f5042891f3..5efe837f91 100644 --- a/src/scalar_8x32_impl.h +++ b/src/scalar_8x32_impl.h @@ -259,6 +259,46 @@ static int secp256k1_scalar_cond_negate(secp256k1_scalar *r, int flag) { return 2 * (mask == 0) - 1; } +static int secp256k1_scalar_complement(secp256k1_scalar *r, const secp256k1_scalar *a) { + uint64_t t = 1; + t += ~a->d[0]; + r->d[0] = t & 0xFFFFFFFFULL; t >>= 32; + t += ~a->d[1]; + r->d[1] = t & 0xFFFFFFFFULL; t >>= 32; + t += ~a->d[2]; + r->d[2] = t & 0xFFFFFFFFULL; t >>= 32; + t += ~a->d[3]; + r->d[3] = t & 0xFFFFFFFFULL; t >>= 32; + t += ~a->d[4]; + r->d[4] = t & 0xFFFFFFFFULL; t >>= 32; + t += ~a->d[5]; + r->d[5] = t & 0xFFFFFFFFULL; t >>= 32; + t += ~a->d[6]; + r->d[6] = t & 0xFFFFFFFFULL; t >>= 32; + t += ~a->d[7]; + r->d[7] = t & 0xFFFFFFFFULL; t >>= 32; + return t; +} + +static int secp256k1_scalar_binadd(secp256k1_scalar *r, const secp256k1_scalar *a, const secp256k1_scalar *b) { + uint64_t t = (uint64_t)a->d[0] + b->d[0]; + r->d[0] = t & 0xFFFFFFFFULL; t >>= 32; + t += (uint64_t)a->d[1] + b->d[1]; + r->d[1] = t & 0xFFFFFFFFULL; t >>= 32; + t += (uint64_t)a->d[2] + b->d[2]; + r->d[2] = t & 0xFFFFFFFFULL; t >>= 32; + t += (uint64_t)a->d[3] + b->d[3]; + r->d[3] = t & 0xFFFFFFFFULL; t >>= 32; + t += (uint64_t)a->d[4] + b->d[4]; + r->d[4] = t & 0xFFFFFFFFULL; t >>= 32; + t += (uint64_t)a->d[5] + b->d[5]; + r->d[5] = t & 0xFFFFFFFFULL; t >>= 32; + t += (uint64_t)a->d[6] + b->d[6]; + r->d[6] = t & 0xFFFFFFFFULL; t >>= 32; + t += (uint64_t)a->d[7] + b->d[7]; + r->d[7] = t & 0xFFFFFFFFULL; t >>= 32; + return t; +} /* Inspired by the macros in OpenSSL's crypto/bn/asm/x86_64-gcc.c. */ @@ -697,6 +737,19 @@ SECP256K1_INLINE static int secp256k1_scalar_eq(const secp256k1_scalar *a, const return ((a->d[0] ^ b->d[0]) | (a->d[1] ^ b->d[1]) | (a->d[2] ^ b->d[2]) | (a->d[3] ^ b->d[3]) | (a->d[4] ^ b->d[4]) | (a->d[5] ^ b->d[5]) | (a->d[6] ^ b->d[6]) | (a->d[7] ^ b->d[7])) == 0; } +SECP256K1_INLINE static int secp256k1_scalar_cmp_var(const secp256k1_scalar *a, const secp256k1_scalar *b) { + int i; + for (i = 7; i >= 0; i--) { + if (a->d[i] > b->d[i]) { + return 1; + } + if (a->d[i] < b->d[i]) { + return -1; + } + } + return 0; +} + SECP256K1_INLINE static void secp256k1_scalar_mul_shift_var(secp256k1_scalar *r, const secp256k1_scalar *a, const secp256k1_scalar *b, unsigned int shift) { uint32_t l[16]; unsigned int shiftlimbs; diff --git a/src/scalar_impl.h b/src/scalar_impl.h index c9b38f3c7c..91821b5279 100644 --- a/src/scalar_impl.h +++ b/src/scalar_impl.h @@ -27,6 +27,8 @@ static const secp256k1_scalar secp256k1_scalar_one = SECP256K1_SCALAR_CONST(0, 0, 0, 0, 0, 0, 0, 1); static const secp256k1_scalar secp256k1_scalar_zero = SECP256K1_SCALAR_CONST(0, 0, 0, 0, 0, 0, 0, 0); +static int secp256k1_scalar_cmp_var(const secp256k1_scalar *a, const secp256k1_scalar *b); + #ifndef USE_NUM_NONE static void secp256k1_scalar_get_num(secp256k1_num *r, const secp256k1_scalar *a) { unsigned char c[32]; @@ -65,7 +67,6 @@ static void secp256k1_scalar_inverse(secp256k1_scalar *r, const secp256k1_scalar /* If this VERIFY_CHECK triggers we were given a noninvertible scalar (and thus * have a composite group order; fix it in exhaustive_tests.c). */ VERIFY_CHECK(*r != 0); -} #else secp256k1_scalar *t; int i; @@ -218,16 +219,183 @@ static void secp256k1_scalar_inverse(secp256k1_scalar *r, const secp256k1_scalar secp256k1_scalar_sqr(t, t); } secp256k1_scalar_mul(r, t, &x6); /* 111111 */ +#endif } -SECP256K1_INLINE static int secp256k1_scalar_is_even(const secp256k1_scalar *a) { - return !(a->d[0] & 1); +#if !defined(EXHAUSTIVE_TEST_ORDER) +static void secp256k1_scalar_pow2_div(secp256k1_scalar *r, const secp256k1_scalar *a, int k) { + static const secp256k1_scalar lookup[16] = { + SECP256K1_SCALAR_CONST( + 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, + 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL), + SECP256K1_SCALAR_CONST( + 0xEFFFFFFFUL, 0xFFFFFFFFUL, 0xFFFFFFFFUL, 0xFFFFFFFEUL, + 0xCF03EF18UL, 0x44541638UL, 0x03D538A4UL, 0x0332DD2DUL), + SECP256K1_SCALAR_CONST( + 0xDFFFFFFFUL, 0xFFFFFFFFUL, 0xFFFFFFFFUL, 0xFFFFFFFEUL, + 0xE3590149UL, 0xD95F8C34UL, 0x47D812BBUL, 0x362F7919UL), + SECP256K1_SCALAR_CONST( + 0xCFFFFFFFUL, 0xFFFFFFFFUL, 0xFFFFFFFFUL, 0xFFFFFFFEUL, + 0xF7AE137BUL, 0x6E6B0230UL, 0x8BDAECD2UL, 0x692C1505UL), + SECP256K1_SCALAR_CONST( + 0xBFFFFFFFUL, 0xFFFFFFFFUL, 0xFFFFFFFFUL, 0xFFFFFFFFUL, + 0x0C0325ADUL, 0x0376782CUL, 0xCFDDC6E9UL, 0x9C28B0F1UL), + SECP256K1_SCALAR_CONST( + 0xAFFFFFFFUL, 0xFFFFFFFFUL, 0xFFFFFFFFUL, 0xFFFFFFFFUL, + 0x205837DEUL, 0x9881EE29UL, 0x13E0A100UL, 0xCF254CDDUL), + SECP256K1_SCALAR_CONST( + 0x9FFFFFFFUL, 0xFFFFFFFFUL, 0xFFFFFFFFUL, 0xFFFFFFFFUL, + 0x34AD4A10UL, 0x2D8D6425UL, 0x57E37B18UL, 0x0221E8C9UL), + SECP256K1_SCALAR_CONST( + 0x8FFFFFFFUL, 0xFFFFFFFFUL, 0xFFFFFFFFUL, 0xFFFFFFFFUL, + 0x49025C41UL, 0xC298DA21UL, 0x9BE6552FUL, 0x351E84B5UL), + SECP256K1_SCALAR_CONST( + 0x7FFFFFFFUL, 0xFFFFFFFFUL, 0xFFFFFFFFUL, 0xFFFFFFFFUL, + 0x5D576E73UL, 0x57A4501DUL, 0xDFE92F46UL, 0x681B20A1UL), + SECP256K1_SCALAR_CONST( + 0x6FFFFFFFUL, 0xFFFFFFFFUL, 0xFFFFFFFFUL, 0xFFFFFFFFUL, + 0x71AC80A4UL, 0xECAFC61AUL, 0x23EC095DUL, 0x9B17BC8DUL), + SECP256K1_SCALAR_CONST( + 0x5FFFFFFFUL, 0xFFFFFFFFUL, 0xFFFFFFFFUL, 0xFFFFFFFFUL, + 0x860192D6UL, 0x81BB3C16UL, 0x67EEE374UL, 0xCE145879UL), + SECP256K1_SCALAR_CONST( + 0x4FFFFFFFUL, 0xFFFFFFFFUL, 0xFFFFFFFFUL, 0xFFFFFFFFUL, + 0x9A56A508UL, 0x16C6B212UL, 0xABF1BD8CUL, 0x0110F465UL), + SECP256K1_SCALAR_CONST( + 0x3FFFFFFFUL, 0xFFFFFFFFUL, 0xFFFFFFFFUL, 0xFFFFFFFFUL, + 0xAEABB739UL, 0xABD2280EUL, 0xEFF497A3UL, 0x340D9051UL), + SECP256K1_SCALAR_CONST( + 0x2FFFFFFFUL, 0xFFFFFFFFUL, 0xFFFFFFFFUL, 0xFFFFFFFFUL, + 0xC300C96BUL, 0x40DD9E0BUL, 0x33F771BAUL, 0x670A2C3DUL), + SECP256K1_SCALAR_CONST( + 0x1FFFFFFFUL, 0xFFFFFFFFUL, 0xFFFFFFFFUL, 0xFFFFFFFFUL, + 0xD755DB9CUL, 0xD5E91407UL, 0x77FA4BD1UL, 0x9A06C829UL), + SECP256K1_SCALAR_CONST( + 0x0FFFFFFFUL, 0xFFFFFFFFUL, 0xFFFFFFFFUL, 0xFFFFFFFFUL, + 0xEBAAEDCEUL, 0x6AF48A03UL, 0xBBFD25E8UL, 0xCD036415UL), + }; + + int data; + int extra_bits = k % 4; + + *r = *a; + if (extra_bits) { + k -= extra_bits; + data = secp256k1_scalar_shr_int(r, extra_bits); + secp256k1_scalar_add(r, r, &lookup[data << (4 - extra_bits)]); + } + + while (k > 0) { + k -= 4; + data = secp256k1_scalar_shr_int(r, 4); + secp256k1_scalar_add(r, r, &lookup[data]); + } + + VERIFY_CHECK(k == 0); +} + +SECP256K1_INLINE static int secp256k1_scalar_shr_zeros(secp256k1_scalar *r) { + int n, k = 0; + + /* Ensure that we do not have more than 15 trailing zeros. */ + while ((n = __builtin_ctz(r->d[0] | (1 << 15)))) { + k += n; + secp256k1_scalar_shr_int(r, n); + } + + return k; +} + +static int secp256k1_scalar_eea_inverse(secp256k1_scalar *r, const secp256k1_scalar *n) { + secp256k1_scalar u, v, i, j, acomp, negx; + secp256k1_scalar *a, *b, *x0, *x1, *tmp; + int ka, kb; + + /* zero is not invertible */ + if (secp256k1_scalar_is_zero(n)) { + secp256k1_scalar_set_int(r, 0); + return 0; + } + + /** + * The extended euclidian algorithm compute x, y and gcd(a, b) such as + * a*x + b*y = gcd(a, b) + * If we use this algorithm with b = p, then we solve a*x + p*y = gcd(a, p) + * We note that: + * - The order is a prime, so gcd(a, p) = 1. + * - We compute modulo p, and y*p = 0 mod p. + * So the equation simplify to a*x = 1, and x = a^-1. + */ + + /* a = n */ + u = *n; + a = &u; + + /* Because 2 is not a common factor between a and b, we can detect + * multiples of 2 using the LSB and eliminate them aggressively. */ + ka = secp256k1_scalar_shr_zeros(a); + + /* b = p - a */ + secp256k1_scalar_negate(&v, a); + b = &v; + + /* x0 = 1 */ + secp256k1_scalar_set_int(&i, 1); + secp256k1_scalar_negate(&negx, &i); + x0 = &i; + + /* x1 = 0 */ + secp256k1_scalar_set_int(&j, 0); + x1 = &j; + + if (secp256k1_scalar_is_one(a)) { + goto done; + } + + /* For a and b, we use 2 comlement math and ensure no overflow happens. */ + secp256k1_scalar_complement(&acomp, a); + goto bzero; + + while (!secp256k1_scalar_is_one(a)) { + secp256k1_scalar_complement(&acomp, a); + secp256k1_scalar_negate(&negx, x0); + + VERIFY_CHECK(secp256k1_scalar_cmp_var(b, a) > 0); + do { + secp256k1_scalar_binadd(b, b, &acomp); + + bzero: + /* We ensure that a and b are odd, so b must be even after subtracting a. */ + VERIFY_CHECK(secp256k1_scalar_is_even(b)); + kb = secp256k1_scalar_shr_zeros(b); + secp256k1_scalar_add(x1, x1, &negx); + secp256k1_scalar_pow2_div(x1, x1, kb); + } while (secp256k1_scalar_cmp_var(b, a) > 0); + + /* a and b can never be equal, so if we exited, it is because a > b. */ + VERIFY_CHECK(secp256k1_scalar_cmp_var(a, b) > 0); + + /* In order to speed things up, we only swap pointers */ + tmp = a; + a = b; + b = tmp; + + tmp = x0; + x0 = x1; + x1 = tmp; + } + +done: + secp256k1_scalar_pow2_div(r, x0, ka); + return 1; } #endif static void secp256k1_scalar_inverse_var(secp256k1_scalar *r, const secp256k1_scalar *x) { -#if defined(USE_SCALAR_INV_BUILTIN) +#if defined(EXHAUSTIVE_TEST_ORDER) secp256k1_scalar_inverse(r, x); +#elif defined(USE_SCALAR_INV_BUILTIN) + secp256k1_scalar_eea_inverse(r, x); #elif defined(USE_SCALAR_INV_NUM) unsigned char b[32]; secp256k1_num n, m; @@ -246,6 +414,12 @@ static void secp256k1_scalar_inverse_var(secp256k1_scalar *r, const secp256k1_sc #endif } +#if !defined(EXHAUSTIVE_TEST_ORDER) +SECP256K1_INLINE static int secp256k1_scalar_is_even(const secp256k1_scalar *a) { + return !(a->d[0] & 1); +} +#endif + #ifdef USE_ENDOMORPHISM #if defined(EXHAUSTIVE_TEST_ORDER) /** diff --git a/src/scalar_low_impl.h b/src/scalar_low_impl.h index ad81f378bf..c126d131ef 100644 --- a/src/scalar_low_impl.h +++ b/src/scalar_low_impl.h @@ -114,6 +114,10 @@ SECP256K1_INLINE static int secp256k1_scalar_eq(const secp256k1_scalar *a, const return *a == *b; } +SECP256K1_INLINE static int secp256k1_scalar_cmp_var(const secp256k1_scalar *a, const secp256k1_scalar *b) { + return (*a > *b) - (*a < *b); +} + static SECP256K1_INLINE void secp256k1_scalar_cmov(secp256k1_scalar *r, const secp256k1_scalar *a, int flag) { uint32_t mask0, mask1; mask0 = flag + ~((uint32_t)0); diff --git a/src/tests.c b/src/tests.c index 7edfa8a4fd..bc0b73098b 100644 --- a/src/tests.c +++ b/src/tests.c @@ -1075,6 +1075,37 @@ void scalar_test(void) { CHECK(secp256k1_scalar_eq(&r1, &v0)); } + { + /* Test division by powers of two */ + int i; + secp256k1_scalar r, e, twoinv; + + secp256k1_scalar_set_int(&twoinv, 2); + secp256k1_scalar_inverse_var(&twoinv, &twoinv); + + e = s; + for (i = 0; i < 25; i++) { + secp256k1_scalar_pow2_div(&r, &s, i); + CHECK(secp256k1_scalar_eq(&r, &e)); + secp256k1_scalar_mul(&e, &e, &twoinv); + } + } + + { + /* test cmp_var */ + secp256k1_scalar negs; + int expected = 0; + + secp256k1_scalar_negate(&negs, &s); + if (!secp256k1_scalar_is_zero(&s)) { + expected = secp256k1_scalar_is_high(&s) * 2 - 1; + } + + CHECK(secp256k1_scalar_cmp_var(&s, &s) == 0); + CHECK(secp256k1_scalar_cmp_var(&negs, &negs) == 0); + CHECK(secp256k1_scalar_cmp_var(&s, &negs) == expected); + CHECK(secp256k1_scalar_cmp_var(&negs, &s) == -expected); + } } void run_scalar_tests(void) { @@ -1119,6 +1150,42 @@ void run_scalar_tests(void) { CHECK(secp256k1_scalar_check_overflow(&overflowed)); } + { + /* Test boundary condition for division by powers of two */ + int j; + secp256k1_scalar r, x, e, twoinv, powtwoinv, negone; + + secp256k1_scalar_set_int(&negone, 1); + secp256k1_scalar_negate(&negone, &negone); + + secp256k1_scalar_set_int(&twoinv, 2); + secp256k1_scalar_inverse_var(&twoinv, &twoinv); + + secp256k1_scalar_set_int(&powtwoinv, 1); + + for (i = 0; i < 4; i++) { + secp256k1_scalar_mul(&powtwoinv, &powtwoinv, &twoinv); + + for (j = 0; j < (1 << i); j++) { + x = negone; + /* clear the n last bits */ + x.d[0] ^= x.d[0] & ((1 << i) - 1); + /* set j in the lowest bits */ + x.d[0] |= j; + + /* Save it for later */ + e = x; + + /* Perform the division and check results. */ + secp256k1_scalar_pow2_div(&r, &x, i + 1); + CHECK(!secp256k1_scalar_check_overflow(&r)); + + secp256k1_scalar_mul(&e, &e, &powtwoinv); + CHECK(secp256k1_scalar_eq(&r, &e)); + } + } + } + { /* Static test vectors. * These were reduced from ~10^12 random vectors based on comparison-decision @@ -1132,9 +1199,7 @@ void run_scalar_tests(void) { secp256k1_scalar one; secp256k1_scalar r1; secp256k1_scalar r2; -#if defined(USE_SCALAR_INV_NUM) secp256k1_scalar zzv; -#endif int overflow; unsigned char chal[33][2][32] = { {{0xff, 0xff, 0x03, 0x07, 0x00, 0x00, 0x00, 0x00, @@ -1684,10 +1749,8 @@ void run_scalar_tests(void) { if (!secp256k1_scalar_is_zero(&y)) { secp256k1_scalar_inverse(&zz, &y); CHECK(!secp256k1_scalar_check_overflow(&zz)); -#if defined(USE_SCALAR_INV_NUM) secp256k1_scalar_inverse_var(&zzv, &y); CHECK(secp256k1_scalar_eq(&zzv, &zz)); -#endif secp256k1_scalar_mul(&z, &z, &zz); CHECK(!secp256k1_scalar_check_overflow(&z)); CHECK(secp256k1_scalar_eq(&x, &z));