diff --git a/src/scalar_4x64_impl.h b/src/scalar_4x64_impl.h
index 2d81006c00..010d8f279d 100644
--- a/src/scalar_4x64_impl.h
+++ b/src/scalar_4x64_impl.h
@@ -181,6 +181,31 @@ static int secp256k1_scalar_cond_negate(secp256k1_scalar *r, int flag) {
     return 2 * (mask == 0) - 1;
 }
 
+static int secp256k1_scalar_complement(secp256k1_scalar *r, const secp256k1_scalar *a) {
+    uint128_t t = 1;
+    t += ~a->d[0];
+    r->d[0] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64;
+    t += ~a->d[1];
+    r->d[1] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64;
+    t += ~a->d[2];
+    r->d[2] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64;
+    t += ~a->d[3];
+    r->d[3] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64;
+    return t;
+}
+
+static int secp256k1_scalar_binadd(secp256k1_scalar *r, const secp256k1_scalar *a, const secp256k1_scalar *b) {
+    uint128_t t = (uint128_t)a->d[0] + b->d[0];
+    r->d[0] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64;
+    t += (uint128_t)a->d[1] + b->d[1];
+    r->d[1] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64;
+    t += (uint128_t)a->d[2] + b->d[2];
+    r->d[2] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64;
+    t += (uint128_t)a->d[3] + b->d[3];
+    r->d[3] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64;
+    return t;
+}
+
 /* Inspired by the macros in OpenSSL's crypto/bn/asm/x86_64-gcc.c. */
 
 /** Add a*b to the number defined by (c0,c1,c2). c2 must never overflow. */
@@ -929,6 +954,19 @@ SECP256K1_INLINE static int secp256k1_scalar_eq(const secp256k1_scalar *a, const
     return ((a->d[0] ^ b->d[0]) | (a->d[1] ^ b->d[1]) | (a->d[2] ^ b->d[2]) | (a->d[3] ^ b->d[3])) == 0;
 }
 
+SECP256K1_INLINE static int secp256k1_scalar_cmp_var(const secp256k1_scalar *a, const secp256k1_scalar *b) {
+    int i;
+    for (i = 3; i >= 0; i--) {
+        if (a->d[i] > b->d[i]) {
+            return 1;
+        }
+        if (a->d[i] < b->d[i]) {
+            return -1;
+        }
+    }
+    return 0;
+}
+
 SECP256K1_INLINE static void secp256k1_scalar_mul_shift_var(secp256k1_scalar *r, const secp256k1_scalar *a, const secp256k1_scalar *b, unsigned int shift) {
     uint64_t l[8];
     unsigned int shiftlimbs;
diff --git a/src/scalar_8x32_impl.h b/src/scalar_8x32_impl.h
index f5042891f3..5efe837f91 100644
--- a/src/scalar_8x32_impl.h
+++ b/src/scalar_8x32_impl.h
@@ -259,6 +259,46 @@ static int secp256k1_scalar_cond_negate(secp256k1_scalar *r, int flag) {
     return 2 * (mask == 0) - 1;
 }
 
+static int secp256k1_scalar_complement(secp256k1_scalar *r, const secp256k1_scalar *a) {
+    uint64_t t = 1;
+    t += ~a->d[0];
+    r->d[0] = t & 0xFFFFFFFFULL; t >>= 32;
+    t += ~a->d[1];
+    r->d[1] = t & 0xFFFFFFFFULL; t >>= 32;
+    t += ~a->d[2];
+    r->d[2] = t & 0xFFFFFFFFULL; t >>= 32;
+    t += ~a->d[3];
+    r->d[3] = t & 0xFFFFFFFFULL; t >>= 32;
+    t += ~a->d[4];
+    r->d[4] = t & 0xFFFFFFFFULL; t >>= 32;
+    t += ~a->d[5];
+    r->d[5] = t & 0xFFFFFFFFULL; t >>= 32;
+    t += ~a->d[6];
+    r->d[6] = t & 0xFFFFFFFFULL; t >>= 32;
+    t += ~a->d[7];
+    r->d[7] = t & 0xFFFFFFFFULL; t >>= 32;
+    return t;
+}
+
+static int secp256k1_scalar_binadd(secp256k1_scalar *r, const secp256k1_scalar *a, const secp256k1_scalar *b) {
+    uint64_t t = (uint64_t)a->d[0] + b->d[0];
+    r->d[0] = t & 0xFFFFFFFFULL; t >>= 32;
+    t += (uint64_t)a->d[1] + b->d[1];
+    r->d[1] = t & 0xFFFFFFFFULL; t >>= 32;
+    t += (uint64_t)a->d[2] + b->d[2];
+    r->d[2] = t & 0xFFFFFFFFULL; t >>= 32;
+    t += (uint64_t)a->d[3] + b->d[3];
+    r->d[3] = t & 0xFFFFFFFFULL; t >>= 32;
+    t += (uint64_t)a->d[4] + b->d[4];
+    r->d[4] = t & 0xFFFFFFFFULL; t >>= 32;
+    t += (uint64_t)a->d[5] + b->d[5];
+    r->d[5] = t & 0xFFFFFFFFULL; t >>= 32;
+    t += (uint64_t)a->d[6] + b->d[6];
+    r->d[6] = t & 0xFFFFFFFFULL; t >>= 32;
+    t += (uint64_t)a->d[7] + b->d[7];
+    r->d[7] = t & 0xFFFFFFFFULL; t >>= 32;
+    return t;
+}
 
 /* Inspired by the macros in OpenSSL's crypto/bn/asm/x86_64-gcc.c. */
 
@@ -697,6 +737,19 @@ SECP256K1_INLINE static int secp256k1_scalar_eq(const secp256k1_scalar *a, const
     return ((a->d[0] ^ b->d[0]) | (a->d[1] ^ b->d[1]) | (a->d[2] ^ b->d[2]) | (a->d[3] ^ b->d[3]) | (a->d[4] ^ b->d[4]) | (a->d[5] ^ b->d[5]) | (a->d[6] ^ b->d[6]) | (a->d[7] ^ b->d[7])) == 0;
 }
 
+SECP256K1_INLINE static int secp256k1_scalar_cmp_var(const secp256k1_scalar *a, const secp256k1_scalar *b) {
+    int i;
+    for (i = 7; i >= 0; i--) {
+        if (a->d[i] > b->d[i]) {
+            return 1;
+        }
+        if (a->d[i] < b->d[i]) {
+            return -1;
+        }
+    }
+    return 0;
+}
+
 SECP256K1_INLINE static void secp256k1_scalar_mul_shift_var(secp256k1_scalar *r, const secp256k1_scalar *a, const secp256k1_scalar *b, unsigned int shift) {
     uint32_t l[16];
     unsigned int shiftlimbs;
diff --git a/src/scalar_impl.h b/src/scalar_impl.h
index c9b38f3c7c..91821b5279 100644
--- a/src/scalar_impl.h
+++ b/src/scalar_impl.h
@@ -27,6 +27,8 @@
 static const secp256k1_scalar secp256k1_scalar_one = SECP256K1_SCALAR_CONST(0, 0, 0, 0, 0, 0, 0, 1);
 static const secp256k1_scalar secp256k1_scalar_zero = SECP256K1_SCALAR_CONST(0, 0, 0, 0, 0, 0, 0, 0);
 
+static int secp256k1_scalar_cmp_var(const secp256k1_scalar *a, const secp256k1_scalar *b);
+
 #ifndef USE_NUM_NONE
 static void secp256k1_scalar_get_num(secp256k1_num *r, const secp256k1_scalar *a) {
     unsigned char c[32];
@@ -65,7 +67,6 @@ static void secp256k1_scalar_inverse(secp256k1_scalar *r, const secp256k1_scalar
     /* If this VERIFY_CHECK triggers we were given a noninvertible scalar (and thus
      * have a composite group order; fix it in exhaustive_tests.c). */
     VERIFY_CHECK(*r != 0);
-}
 #else
     secp256k1_scalar *t;
     int i;
@@ -218,16 +219,183 @@ static void secp256k1_scalar_inverse(secp256k1_scalar *r, const secp256k1_scalar
         secp256k1_scalar_sqr(t, t);
     }
     secp256k1_scalar_mul(r, t, &x6); /* 111111 */
+#endif
 }
 
-SECP256K1_INLINE static int secp256k1_scalar_is_even(const secp256k1_scalar *a) {
-    return !(a->d[0] & 1);
+#if !defined(EXHAUSTIVE_TEST_ORDER)
+static void secp256k1_scalar_pow2_div(secp256k1_scalar *r, const secp256k1_scalar *a, int k) {
+    static const secp256k1_scalar lookup[16] = {
+        SECP256K1_SCALAR_CONST(
+            0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL,
+            0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL),
+        SECP256K1_SCALAR_CONST(
+            0xEFFFFFFFUL, 0xFFFFFFFFUL, 0xFFFFFFFFUL, 0xFFFFFFFEUL,
+            0xCF03EF18UL, 0x44541638UL, 0x03D538A4UL, 0x0332DD2DUL),
+        SECP256K1_SCALAR_CONST(
+            0xDFFFFFFFUL, 0xFFFFFFFFUL, 0xFFFFFFFFUL, 0xFFFFFFFEUL,
+            0xE3590149UL, 0xD95F8C34UL, 0x47D812BBUL, 0x362F7919UL),
+        SECP256K1_SCALAR_CONST(
+            0xCFFFFFFFUL, 0xFFFFFFFFUL, 0xFFFFFFFFUL, 0xFFFFFFFEUL,
+            0xF7AE137BUL, 0x6E6B0230UL, 0x8BDAECD2UL, 0x692C1505UL),
+        SECP256K1_SCALAR_CONST(
+            0xBFFFFFFFUL, 0xFFFFFFFFUL, 0xFFFFFFFFUL, 0xFFFFFFFFUL,
+            0x0C0325ADUL, 0x0376782CUL, 0xCFDDC6E9UL, 0x9C28B0F1UL),
+        SECP256K1_SCALAR_CONST(
+            0xAFFFFFFFUL, 0xFFFFFFFFUL, 0xFFFFFFFFUL, 0xFFFFFFFFUL,
+            0x205837DEUL, 0x9881EE29UL, 0x13E0A100UL, 0xCF254CDDUL),
+        SECP256K1_SCALAR_CONST(
+            0x9FFFFFFFUL, 0xFFFFFFFFUL, 0xFFFFFFFFUL, 0xFFFFFFFFUL,
+            0x34AD4A10UL, 0x2D8D6425UL, 0x57E37B18UL, 0x0221E8C9UL),
+        SECP256K1_SCALAR_CONST(
+            0x8FFFFFFFUL, 0xFFFFFFFFUL, 0xFFFFFFFFUL, 0xFFFFFFFFUL,
+            0x49025C41UL, 0xC298DA21UL, 0x9BE6552FUL, 0x351E84B5UL),
+        SECP256K1_SCALAR_CONST(
+            0x7FFFFFFFUL, 0xFFFFFFFFUL, 0xFFFFFFFFUL, 0xFFFFFFFFUL,
+            0x5D576E73UL, 0x57A4501DUL, 0xDFE92F46UL, 0x681B20A1UL),
+        SECP256K1_SCALAR_CONST(
+            0x6FFFFFFFUL, 0xFFFFFFFFUL, 0xFFFFFFFFUL, 0xFFFFFFFFUL,
+            0x71AC80A4UL, 0xECAFC61AUL, 0x23EC095DUL, 0x9B17BC8DUL),
+        SECP256K1_SCALAR_CONST(
+            0x5FFFFFFFUL, 0xFFFFFFFFUL, 0xFFFFFFFFUL, 0xFFFFFFFFUL,
+            0x860192D6UL, 0x81BB3C16UL, 0x67EEE374UL, 0xCE145879UL),
+        SECP256K1_SCALAR_CONST(
+            0x4FFFFFFFUL, 0xFFFFFFFFUL, 0xFFFFFFFFUL, 0xFFFFFFFFUL,
+            0x9A56A508UL, 0x16C6B212UL, 0xABF1BD8CUL, 0x0110F465UL),
+        SECP256K1_SCALAR_CONST(
+            0x3FFFFFFFUL, 0xFFFFFFFFUL, 0xFFFFFFFFUL, 0xFFFFFFFFUL,
+            0xAEABB739UL, 0xABD2280EUL, 0xEFF497A3UL, 0x340D9051UL),
+        SECP256K1_SCALAR_CONST(
+            0x2FFFFFFFUL, 0xFFFFFFFFUL, 0xFFFFFFFFUL, 0xFFFFFFFFUL,
+            0xC300C96BUL, 0x40DD9E0BUL, 0x33F771BAUL, 0x670A2C3DUL),
+        SECP256K1_SCALAR_CONST(
+            0x1FFFFFFFUL, 0xFFFFFFFFUL, 0xFFFFFFFFUL, 0xFFFFFFFFUL,
+            0xD755DB9CUL, 0xD5E91407UL, 0x77FA4BD1UL, 0x9A06C829UL),
+        SECP256K1_SCALAR_CONST(
+            0x0FFFFFFFUL, 0xFFFFFFFFUL, 0xFFFFFFFFUL, 0xFFFFFFFFUL,
+            0xEBAAEDCEUL, 0x6AF48A03UL, 0xBBFD25E8UL, 0xCD036415UL),
+    };
+
+    int data;
+    int extra_bits = k % 4;
+
+    *r = *a;
+    if (extra_bits) {
+        k -= extra_bits;
+        data = secp256k1_scalar_shr_int(r, extra_bits);
+        secp256k1_scalar_add(r, r, &lookup[data << (4 - extra_bits)]);
+    }
+
+    while (k > 0) {
+        k -= 4;
+        data = secp256k1_scalar_shr_int(r, 4);
+        secp256k1_scalar_add(r, r, &lookup[data]);
+    }
+
+    VERIFY_CHECK(k == 0);
+}
+
+SECP256K1_INLINE static int secp256k1_scalar_shr_zeros(secp256k1_scalar *r) {
+    int n, k = 0;
+
+    /* Ensure that we do not have more than 15 trailing zeros. */
+    while ((n = __builtin_ctz(r->d[0] | (1 << 15)))) {
+        k += n;
+        secp256k1_scalar_shr_int(r, n);
+    }
+
+    return k;
+}
+
+static int secp256k1_scalar_eea_inverse(secp256k1_scalar *r, const secp256k1_scalar *n) {
+    secp256k1_scalar u, v, i, j, acomp, negx;
+    secp256k1_scalar *a, *b, *x0, *x1, *tmp;
+    int ka, kb;
+
+    /* zero is not invertible */
+    if (secp256k1_scalar_is_zero(n)) {
+        secp256k1_scalar_set_int(r, 0);
+        return 0;
+    }
+
+    /**
+     * The extended euclidian algorithm compute x, y and gcd(a, b) such as
+     * a*x + b*y = gcd(a, b)
+     * If we use this algorithm with b = p, then we solve a*x + p*y = gcd(a, p)
+     * We note that:
+     *  - The order is a prime, so gcd(a, p) = 1.
+     *  - We compute modulo p, and y*p = 0 mod p.
+     * So the equation simplify to a*x = 1, and x = a^-1.
+     */
+
+    /* a = n */
+    u = *n;
+    a = &u;
+
+    /* Because 2 is not a common factor between a and b, we can detect
+     * multiples of 2 using the LSB and eliminate them aggressively. */
+    ka = secp256k1_scalar_shr_zeros(a);
+
+    /* b = p - a */
+    secp256k1_scalar_negate(&v, a);
+    b = &v;
+
+    /* x0 = 1 */
+    secp256k1_scalar_set_int(&i, 1);
+    secp256k1_scalar_negate(&negx, &i);
+    x0 = &i;
+
+    /* x1 = 0 */
+    secp256k1_scalar_set_int(&j, 0);
+    x1 = &j;
+
+    if (secp256k1_scalar_is_one(a)) {
+        goto done;
+    }
+
+    /* For a and b, we use 2 comlement math and ensure no overflow happens. */
+    secp256k1_scalar_complement(&acomp, a);
+    goto bzero;
+
+    while (!secp256k1_scalar_is_one(a)) {
+        secp256k1_scalar_complement(&acomp, a);
+        secp256k1_scalar_negate(&negx, x0);
+
+        VERIFY_CHECK(secp256k1_scalar_cmp_var(b, a) > 0);
+        do {
+            secp256k1_scalar_binadd(b, b, &acomp);
+
+        bzero:
+            /* We ensure that a and b are odd, so b must be even after subtracting a. */
+            VERIFY_CHECK(secp256k1_scalar_is_even(b));
+            kb = secp256k1_scalar_shr_zeros(b);
+            secp256k1_scalar_add(x1, x1, &negx);
+            secp256k1_scalar_pow2_div(x1, x1, kb);
+        } while (secp256k1_scalar_cmp_var(b, a) > 0);
+
+        /* a and b can never be equal, so if we exited, it is because a > b. */
+        VERIFY_CHECK(secp256k1_scalar_cmp_var(a, b) > 0);
+
+        /* In order to speed things up, we only swap pointers */
+        tmp = a;
+        a = b;
+        b = tmp;
+
+        tmp = x0;
+        x0 = x1;
+        x1 = tmp;
+    }
+
+done:
+    secp256k1_scalar_pow2_div(r, x0, ka);
+    return 1;
 }
 #endif
 
 static void secp256k1_scalar_inverse_var(secp256k1_scalar *r, const secp256k1_scalar *x) {
-#if defined(USE_SCALAR_INV_BUILTIN)
+#if defined(EXHAUSTIVE_TEST_ORDER)
     secp256k1_scalar_inverse(r, x);
+#elif defined(USE_SCALAR_INV_BUILTIN)
+    secp256k1_scalar_eea_inverse(r, x);
 #elif defined(USE_SCALAR_INV_NUM)
     unsigned char b[32];
     secp256k1_num n, m;
@@ -246,6 +414,12 @@ static void secp256k1_scalar_inverse_var(secp256k1_scalar *r, const secp256k1_sc
 #endif
 }
 
+#if !defined(EXHAUSTIVE_TEST_ORDER)
+SECP256K1_INLINE static int secp256k1_scalar_is_even(const secp256k1_scalar *a) {
+    return !(a->d[0] & 1);
+}
+#endif
+
 #ifdef USE_ENDOMORPHISM
 #if defined(EXHAUSTIVE_TEST_ORDER)
 /**
diff --git a/src/scalar_low_impl.h b/src/scalar_low_impl.h
index ad81f378bf..c126d131ef 100644
--- a/src/scalar_low_impl.h
+++ b/src/scalar_low_impl.h
@@ -114,6 +114,10 @@ SECP256K1_INLINE static int secp256k1_scalar_eq(const secp256k1_scalar *a, const
     return *a == *b;
 }
 
+SECP256K1_INLINE static int secp256k1_scalar_cmp_var(const secp256k1_scalar *a, const secp256k1_scalar *b) {
+    return (*a > *b) - (*a < *b);
+}
+
 static SECP256K1_INLINE void secp256k1_scalar_cmov(secp256k1_scalar *r, const secp256k1_scalar *a, int flag) {
     uint32_t mask0, mask1;
     mask0 = flag + ~((uint32_t)0);
diff --git a/src/tests.c b/src/tests.c
index 7edfa8a4fd..bc0b73098b 100644
--- a/src/tests.c
+++ b/src/tests.c
@@ -1075,6 +1075,37 @@ void scalar_test(void) {
         CHECK(secp256k1_scalar_eq(&r1, &v0));
     }
 
+    {
+        /* Test division by powers of two */
+        int i;
+        secp256k1_scalar r, e, twoinv;
+
+        secp256k1_scalar_set_int(&twoinv, 2);
+        secp256k1_scalar_inverse_var(&twoinv, &twoinv);
+
+        e = s;
+        for (i = 0; i < 25; i++) {
+            secp256k1_scalar_pow2_div(&r, &s, i);
+            CHECK(secp256k1_scalar_eq(&r, &e));
+            secp256k1_scalar_mul(&e, &e, &twoinv);
+        }
+    }
+
+    {
+        /* test cmp_var */
+        secp256k1_scalar negs;
+        int expected = 0;
+
+        secp256k1_scalar_negate(&negs, &s);
+        if (!secp256k1_scalar_is_zero(&s)) {
+            expected = secp256k1_scalar_is_high(&s) * 2 - 1;
+        }
+
+        CHECK(secp256k1_scalar_cmp_var(&s, &s) == 0);
+        CHECK(secp256k1_scalar_cmp_var(&negs, &negs) == 0);
+        CHECK(secp256k1_scalar_cmp_var(&s, &negs) == expected);
+        CHECK(secp256k1_scalar_cmp_var(&negs, &s) == -expected);
+    }
 }
 
 void run_scalar_tests(void) {
@@ -1119,6 +1150,42 @@ void run_scalar_tests(void) {
         CHECK(secp256k1_scalar_check_overflow(&overflowed));
     }
 
+    {
+        /* Test boundary condition for division by powers of two */
+        int j;
+        secp256k1_scalar r, x, e, twoinv, powtwoinv, negone;
+
+        secp256k1_scalar_set_int(&negone, 1);
+        secp256k1_scalar_negate(&negone, &negone);
+
+        secp256k1_scalar_set_int(&twoinv, 2);
+        secp256k1_scalar_inverse_var(&twoinv, &twoinv);
+
+        secp256k1_scalar_set_int(&powtwoinv, 1);
+
+        for (i = 0; i < 4; i++) {
+            secp256k1_scalar_mul(&powtwoinv, &powtwoinv, &twoinv);
+
+            for (j = 0; j < (1 << i); j++) {
+                x = negone;
+                /* clear the n last bits */
+                x.d[0] ^= x.d[0] & ((1 << i) - 1);
+                /* set j in the lowest bits */
+                x.d[0] |= j;
+
+                /* Save it for later */
+                e = x;
+
+                /* Perform the division and check results. */
+                secp256k1_scalar_pow2_div(&r, &x, i + 1);
+                CHECK(!secp256k1_scalar_check_overflow(&r));
+
+                secp256k1_scalar_mul(&e, &e, &powtwoinv);
+                CHECK(secp256k1_scalar_eq(&r, &e));
+            }
+        }
+    }
+
     {
         /* Static test vectors.
          * These were reduced from ~10^12 random vectors based on comparison-decision
@@ -1132,9 +1199,7 @@ void run_scalar_tests(void) {
         secp256k1_scalar one;
         secp256k1_scalar r1;
         secp256k1_scalar r2;
-#if defined(USE_SCALAR_INV_NUM)
         secp256k1_scalar zzv;
-#endif
         int overflow;
         unsigned char chal[33][2][32] = {
             {{0xff, 0xff, 0x03, 0x07, 0x00, 0x00, 0x00, 0x00,
@@ -1684,10 +1749,8 @@ void run_scalar_tests(void) {
             if (!secp256k1_scalar_is_zero(&y)) {
                 secp256k1_scalar_inverse(&zz, &y);
                 CHECK(!secp256k1_scalar_check_overflow(&zz));
-#if defined(USE_SCALAR_INV_NUM)
                 secp256k1_scalar_inverse_var(&zzv, &y);
                 CHECK(secp256k1_scalar_eq(&zzv, &zz));
-#endif
                 secp256k1_scalar_mul(&z, &z, &zz);
                 CHECK(!secp256k1_scalar_check_overflow(&z));
                 CHECK(secp256k1_scalar_eq(&x, &z));