@@ -222,7 +222,8 @@ static void secp256k1_scalar_inverse(secp256k1_scalar *r, const secp256k1_scalar
222222#endif
223223}
224224
225- SECP256K1_INLINE static void secp256k1_scalar_pow2_div (secp256k1_scalar * r , const secp256k1_scalar * a , int k ) {
225+ #if !defined(EXHAUSTIVE_TEST_ORDER )
226+ static void secp256k1_scalar_pow2_div (secp256k1_scalar * r , const secp256k1_scalar * a , int k ) {
226227 static const secp256k1_scalar lookup [15 ] = {
227228 SECP256K1_SCALAR_CONST (
228229 0xEFFFFFFFUL , 0xFFFFFFFFUL , 0xFFFFFFFFUL , 0xFFFFFFFEUL ,
@@ -297,9 +298,110 @@ SECP256K1_INLINE static void secp256k1_scalar_pow2_div(secp256k1_scalar *r, cons
297298 VERIFY_CHECK (k == 0 );
298299}
299300
301+ SECP256K1_INLINE static int secp256k1_scalar_shr_zeros (secp256k1_scalar * r ) {
302+ int n , k = 0 ;
303+
304+ /* Ensure that we do not have more than 15 trailing zeros. */
305+ while ((n = __builtin_ctz (r -> d [0 ] | (1 << 15 )))) {
306+ k += n ;
307+ secp256k1_scalar_shr_int (r , n );
308+ }
309+
310+ return k ;
311+ }
312+
313+ static int secp256k1_scalar_eea_inverse (secp256k1_scalar * r , const secp256k1_scalar * n ) {
314+ secp256k1_scalar u , v , i , j , acomp , negx ;
315+ secp256k1_scalar * a , * b , * x0 , * x1 , * tmp ;
316+ int ka , kb ;
317+
318+ /* zero is not invertible */
319+ if (secp256k1_scalar_is_zero (n )) {
320+ secp256k1_scalar_set_int (r , 0 );
321+ return 0 ;
322+ }
323+
324+ /**
325+ * The extended euclidian algorithm compute x, y and gcd(a, b) such as
326+ * a*x + b*y = gcd(a, b)
327+ * If we use this algorithm with b = p, then we solve a*x + p*y = gcd(a, p)
328+ * We note that:
329+ * - The order is a prime, so gcd(a, p) = 1.
330+ * - We compute modulo p, and y*p = 0 mod p.
331+ * So the equation simplify to a*x = 1, and x = a^-1.
332+ */
333+
334+ /* a = n */
335+ u = * n ;
336+ a = & u ;
337+
338+ /* Because 2 is not a common factor between a and b, we can detect
339+ * multiples of 2 using the LSB and eliminate them aggressively. */
340+ ka = secp256k1_scalar_shr_zeros (a );
341+
342+ /* b = p - a */
343+ secp256k1_scalar_negate (& v , a );
344+ b = & v ;
345+
346+ /* x0 = 1 */
347+ secp256k1_scalar_set_int (& i , 1 );
348+ secp256k1_scalar_negate (& negx , & i );
349+ x0 = & i ;
350+
351+ /* x1 = 0 */
352+ secp256k1_scalar_set_int (& j , 0 );
353+ x1 = & j ;
354+
355+ if (secp256k1_scalar_is_one (a )) {
356+ goto done ;
357+ }
358+
359+ /* For a and b, we use 2 comlement math and ensure no overflow happens. */
360+ secp256k1_scalar_complement (& acomp , a );
361+ goto bzero ;
362+
363+ while (!secp256k1_scalar_is_one (a )) {
364+ secp256k1_scalar_complement (& acomp , a );
365+ secp256k1_scalar_negate (& negx , x0 );
366+
367+ VERIFY_CHECK (secp256k1_scalar_cmp_var (b , a ) > 0 );
368+ do {
369+ secp256k1_scalar_binadd (b , b , & acomp );
370+
371+ bzero :
372+ /* We ensure that a and b are odd, so b must be even after subtracting a. */
373+ VERIFY_CHECK (secp256k1_scalar_is_even (b ));
374+
375+ secp256k1_scalar_add (x1 , x1 , & negx );
376+
377+ kb = secp256k1_scalar_shr_zeros (b );
378+ secp256k1_scalar_pow2_div (x1 , x1 , kb );
379+ } while (secp256k1_scalar_cmp_var (b , a ) > 0 );
380+
381+ /* a and b can never be equal, so if we exited, it is because a > b. */
382+ VERIFY_CHECK (secp256k1_scalar_cmp_var (a , b ) > 0 );
383+
384+ /* In order to speed things up, we only swap pointers */
385+ tmp = a ;
386+ a = b ;
387+ b = tmp ;
388+
389+ tmp = x0 ;
390+ x0 = x1 ;
391+ x1 = tmp ;
392+ }
393+
394+ done :
395+ secp256k1_scalar_pow2_div (r , x0 , ka );
396+ return 1 ;
397+ }
398+ #endif
399+
300400static void secp256k1_scalar_inverse_var (secp256k1_scalar * r , const secp256k1_scalar * x ) {
301- #if defined(USE_SCALAR_INV_BUILTIN )
401+ #if defined(EXHAUSTIVE_TEST_ORDER )
302402 secp256k1_scalar_inverse (r , x );
403+ #elif defined(USE_SCALAR_INV_BUILTIN )
404+ secp256k1_scalar_eea_inverse (r , x );
303405#elif defined(USE_SCALAR_INV_NUM )
304406 unsigned char b [32 ];
305407 secp256k1_num n , m ;
0 commit comments