@@ -222,6 +222,7 @@ static void secp256k1_scalar_inverse(secp256k1_scalar *r, const secp256k1_scalar
222222#endif
223223}
224224
225+ #if !defined(EXHAUSTIVE_TEST_ORDER )
225226static void secp256k1_scalar_pow2_div (secp256k1_scalar * r , const secp256k1_scalar * a , int k ) {
226227 static const secp256k1_scalar lookup [16 ] = {
227228 SECP256K1_SCALAR_CONST (
@@ -293,9 +294,108 @@ static void secp256k1_scalar_pow2_div(secp256k1_scalar *r, const secp256k1_scala
293294 VERIFY_CHECK (k == 0 );
294295}
295296
297+ SECP256K1_INLINE static int secp256k1_scalar_shr_zeros (secp256k1_scalar * r ) {
298+ int n , k = 0 ;
299+
300+ /* Ensure that we do not have more than 15 trailing zeros. */
301+ while ((n = __builtin_ctz (r -> d [0 ] | (1 << 15 )))) {
302+ k += n ;
303+ secp256k1_scalar_shr_int (r , n );
304+ }
305+
306+ return k ;
307+ }
308+
309+ static int secp256k1_scalar_eea_inverse (secp256k1_scalar * r , const secp256k1_scalar * n ) {
310+ secp256k1_scalar u , v , i , j , acomp , negx ;
311+ secp256k1_scalar * a , * b , * x0 , * x1 , * tmp ;
312+ int ka , kb ;
313+
314+ /* zero is not invertible */
315+ if (secp256k1_scalar_is_zero (n )) {
316+ secp256k1_scalar_set_int (r , 0 );
317+ return 0 ;
318+ }
319+
320+ /**
321+ * The extended euclidian algorithm compute x, y and gcd(a, b) such as
322+ * a*x + b*y = gcd(a, b)
323+ * If we use this algorithm with b = p, then we solve a*x + p*y = gcd(a, p)
324+ * We note that:
325+ * - The order is a prime, so gcd(a, p) = 1.
326+ * - We compute modulo p, and y*p = 0 mod p.
327+ * So the equation simplify to a*x = 1, and x = a^-1.
328+ */
329+
330+ /* a = n */
331+ u = * n ;
332+ a = & u ;
333+
334+ /* Because 2 is not a common factor between a and b, we can detect
335+ * multiples of 2 using the LSB and eliminate them aggressively. */
336+ ka = secp256k1_scalar_shr_zeros (a );
337+
338+ /* b = p - a */
339+ secp256k1_scalar_negate (& v , a );
340+ b = & v ;
341+
342+ /* x0 = 1 */
343+ secp256k1_scalar_set_int (& i , 1 );
344+ secp256k1_scalar_negate (& negx , & i );
345+ x0 = & i ;
346+
347+ /* x1 = 0 */
348+ secp256k1_scalar_set_int (& j , 0 );
349+ x1 = & j ;
350+
351+ if (secp256k1_scalar_is_one (a )) {
352+ goto done ;
353+ }
354+
355+ /* For a and b, we use 2 comlement math and ensure no overflow happens. */
356+ secp256k1_scalar_complement (& acomp , a );
357+ goto bzero ;
358+
359+ while (!secp256k1_scalar_is_one (a )) {
360+ secp256k1_scalar_complement (& acomp , a );
361+ secp256k1_scalar_negate (& negx , x0 );
362+
363+ VERIFY_CHECK (secp256k1_scalar_cmp_var (b , a ) > 0 );
364+ do {
365+ secp256k1_scalar_binadd (b , b , & acomp );
366+
367+ bzero :
368+ /* We ensure that a and b are odd, so b must be even after subtracting a. */
369+ VERIFY_CHECK (secp256k1_scalar_is_even (b ));
370+ kb = secp256k1_scalar_shr_zeros (b );
371+ secp256k1_scalar_add (x1 , x1 , & negx );
372+ secp256k1_scalar_pow2_div (x1 , x1 , kb );
373+ } while (secp256k1_scalar_cmp_var (b , a ) > 0 );
374+
375+ /* a and b can never be equal, so if we exited, it is because a > b. */
376+ VERIFY_CHECK (secp256k1_scalar_cmp_var (a , b ) > 0 );
377+
378+ /* In order to speed things up, we only swap pointers */
379+ tmp = a ;
380+ a = b ;
381+ b = tmp ;
382+
383+ tmp = x0 ;
384+ x0 = x1 ;
385+ x1 = tmp ;
386+ }
387+
388+ done :
389+ secp256k1_scalar_pow2_div (r , x0 , ka );
390+ return 1 ;
391+ }
392+ #endif
393+
296394static void secp256k1_scalar_inverse_var (secp256k1_scalar * r , const secp256k1_scalar * x ) {
297- #if defined(USE_SCALAR_INV_BUILTIN )
395+ #if defined(EXHAUSTIVE_TEST_ORDER )
298396 secp256k1_scalar_inverse (r , x );
397+ #elif defined(USE_SCALAR_INV_BUILTIN )
398+ secp256k1_scalar_eea_inverse (r , x );
299399#elif defined(USE_SCALAR_INV_NUM )
300400 unsigned char b [32 ];
301401 secp256k1_num n , m ;
0 commit comments