Skip to content

Commit f214a04

Browse files
committed
ed25519: review add in avx512
1 parent ed2a927 commit f214a04

File tree

7 files changed

+106
-17
lines changed

7 files changed

+106
-17
lines changed

src/ballet/ed25519/avx512/fd_curve25519.h

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -97,8 +97,12 @@ fd_ed25519_point_neg( fd_ed25519_point_t * r,
9797
_p03 = wwl( 8796093022189L, 8796093022189L, 8796093022189L, 8796093022189L, 8796093022207L, 8796093022207L, 8796093022207L, 8796093022207L );
9898
_p14 = wwl( 8796093022207L, 8796093022207L, 8796093022207L, 8796093022207L, 8796093022207L, 8796093022207L, 8796093022207L, 8796093022207L );
9999
_p25 = wwl( 8796093022207L, 8796093022207L, 8796093022207L, 8796093022207L, 1099511627775L, 1099511627775L, 1099511627775L, 1099511627775L );
100-
FD_R43X6_QUAD_LANE_SUB_FAST( r->P, a->P, 1,0,0,1, _p, a->P );
101-
FD_R43X6_QUAD_FOLD_UNSIGNED( r->P, r->P );
100+
// FD_R43X6_QUAD_LANE_SUB_FAST( r->P, a->P, 1,0,0,1, _p, a->P );
101+
// FD_R43X6_QUAD_FOLD_UNSIGNED( r->P, r->P );
102+
int _mask = 0x99; /* 1001 1001 */
103+
r->P03 = wwv_sub_if( _mask, _p03, a->P03, a->P03 );
104+
r->P14 = wwv_sub_if( _mask, _p14, a->P14, a->P14 );
105+
r->P25 = wwv_sub_if( _mask, _p25, a->P25, a->P25 );
102106
return r;
103107
}
104108

src/ballet/ed25519/avx512/fd_f25519.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,8 @@ fd_f25519_mul_121666( fd_f25519_t * r,
9696

9797
/* fd_f25519_frombytes deserializes a 32-byte buffer buf into a
9898
fd_f25519_t element r, and returns r.
99-
buf is in little endian form, according to RFC 8032. */
99+
buf is in little endian form, we accept non-canonical elements
100+
unlike RFC 8032. */
100101
FD_25519_INLINE fd_f25519_t *
101102
fd_f25519_frombytes( fd_f25519_t * r,
102103
uchar const buf[ 32 ] ) {
@@ -110,7 +111,8 @@ fd_f25519_frombytes( fd_f25519_t * r,
110111

111112
/* fd_f25519_tobytes serializes a fd_f25519_t element a into
112113
a 32-byte buffer out, and returns out.
113-
out is in little endian form, according to RFC 8032. */
114+
out is in little endian form, according to RFC 8032
115+
(we don't output non-canonical elements). */
114116
FD_25519_INLINE uchar *
115117
fd_f25519_tobytes( uchar out[ 32 ],
116118
fd_f25519_t const * a ) {

src/ballet/ed25519/avx512/fd_r43x6_ge.h

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -125,15 +125,19 @@ fd_r43x6_ge_is_eq( wwl_t X03, wwl_t X14, wwl_t X25,
125125
FD_R43X6_QUAD_PERMUTE ( _tb, 1,0,2,3, P2 ); /* _tb = (Y2, X2, Z2, T2 ), s61|s61|s61|s61 */ \
126126
FD_R43X6_QUAD_LANE_SUB_FAST( _ta, _ta, 1,0,0,0, _ta, P1 ); /* _ta = (Y1-X1,X1, Z1, T1 ), s62|s61|s61|s61 */ \
127127
FD_R43X6_QUAD_LANE_SUB_FAST( _tb, _tb, 1,0,0,0, _tb, P2 ); /* _tb = (Y2-X2,X2, Z2, T2 ), s62|s61|s61|s61 */ \
128-
FD_R43X6_QUAD_LANE_ADD_FAST( _ta, _ta, 0,1,1,0, _ta, P1 ); /* _ta = (Y1-X1,Y1+X1,Z1*2, T1 ), s62|s62|s61|s61 */ \
128+
FD_R43X6_QUAD_LANE_ADD_FAST( _ta, _ta, 0,1,1,0, _ta, P1 ); /* _ta = (Y1-X1,Y1+X1,Z1*2, T1 ), s62|s62|s62|s61 */ \
129129
FD_R43X6_QUAD_LANE_ADD_FAST( _tb, _tb, 0,1,0,0, _tb, P2 ); /* _tb = (Y2-X2,Y2+X2,Z2, T2 ), s62|s62|s61|s61 */ \
130-
FD_R43X6_QUAD_MUL_FAST ( _ta, _ta, _tb ); /* _ta = (A, B, D, C ), u62|u62|u62|u62 */ \
131-
FD_R43X6_QUAD_FOLD_UNSIGNED( _ta, _ta ); /* _ta = (Y1-X1,Y1+X1,Z1*2, T1*2d), u44|u44|u44|u44 */ \
130+
FD_R43X6_QUAD_FOLD_SIGNED ( _ta, _ta ); /* _ta = (Y1-X1,Y1+X1,Z1*2, T1 ), u44|u44|u44|u44 */ \
131+
FD_R43X6_QUAD_FOLD_SIGNED ( _tb, _tb ); /* _tb = (Y2-X2,Y2+X2,Z2, T2 ), u44|u44|u44|u44 */ \
132132
FD_R43X6_QUAD_MUL_FAST ( _ta, _ta, _1112d ); /* _ta = (Y1-X1,Y1+X1,Z1*2, T1*2d), u62|u62|u62|u62 */ \
133+
FD_R43X6_QUAD_FOLD_UNSIGNED( _ta, _ta ); /* _ta = (Y1-X1,Y1+X1,Z1*2, T1*2d), u44|u44|u44|u44 */ \
134+
FD_R43X6_QUAD_MUL_FAST ( _ta, _ta, _tb ); /* _ta = (A, B, D, C ), u62|u62|u62|u62 */ \
135+
/* the next line can't be removed because in 3 lines we'd get s62|u63|u63|s62 and that's not ok for fold_signed */ \
133136
FD_R43X6_QUAD_FOLD_UNSIGNED( _ta, _ta ); /* _ta = (A, B, D, C ), u44|u44|u44|u44 */ \
134-
FD_R43X6_QUAD_PERMUTE ( _tb, 1,0,3,2, _ta ); /* _tb = (B, A, C, D ), u62|u62|u62|u62 */ \
135-
FD_R43X6_QUAD_LANE_SUB_FAST( _tb, _tb, 1,0,0,1, _tb, _ta ); /* _tb = (E, A, C, F ), s62|u62|u62|s62 */ \
136-
FD_R43X6_QUAD_LANE_ADD_FAST( _tb, _tb, 0,1,1,0, _tb, _ta ); /* _tb = (E, H, G, F ), s62|u63|u63|s62 */ \
137+
FD_R43X6_QUAD_PERMUTE ( _tb, 1,0,3,2, _ta ); /* _tb = (B, A, C, D ), u44|u44|u44|u44 */ \
138+
FD_R43X6_QUAD_LANE_SUB_FAST( _tb, _tb, 1,0,0,1, _tb, _ta ); /* _tb = (E, A, C, F ), s44|u44|u44|s44 */ \
139+
FD_R43X6_QUAD_LANE_ADD_FAST( _tb, _tb, 0,1,1,0, _tb, _ta ); /* _tb = (E, H, G, F ), s44|u45|u45|s44 */ \
140+
FD_R43X6_QUAD_FOLD_SIGNED ( _tb, _tb ); /* _tb = (E, H, G, F ), u44|u44|u44|u44 */ \
137141
FD_R43X6_QUAD_PERMUTE ( _ta, 0,2,2,0, _tb ); /* _ta = (E, G, G, E ), u44|u44|u44|u44 */ \
138142
FD_R43X6_QUAD_PERMUTE ( _tb, 3,1,3,1, _tb ); /* _tb = (F, H, F, H ), u44|u44|u44|u44 */ \
139143
FD_R43X6_QUAD_MUL_FAST ( _ta, _ta, _tb ); /* _ta = (X3, Y3, Z3, T3 ), u62|u62|u62|u62 */ \

src/ballet/ed25519/fd_curve25519.h

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -179,15 +179,15 @@ fd_ed25519_multi_scalar_mul_base( fd_ed25519_point_t * r,
179179

180180
/* fd_ed25519_point_frombytes deserializes a 32-byte buffer buf into a
181181
point r, and returns r (on success, NULL on error).
182-
buf is in little endian form, according to RFC 8032.
182+
buf is in little endian form, we accept non-canonical points unlike RFC 8032.
183183
Cost: 1sqrt ~= 1inv ~= 250mul */
184184
fd_ed25519_point_t *
185185
fd_ed25519_point_frombytes( fd_ed25519_point_t * r,
186186
uchar const buf[ 32 ] );
187187

188188
/* fd_ed25519_point_frombytes_2x deserializes 2x 32-byte buffers buf1, buf2
189189
resp. into points r1, r2, and returns r.
190-
buf1, buf2 are in little endian form, according to RFC 8032.
190+
buf1, buf2 are in little endian form, we accept non-canonical points unlike RFC 8032.
191191
It returns 0 on success, 1 or 2 on failure.
192192
Cost: 2sqrt (executed concurrently if possible) */
193193
int
@@ -208,7 +208,8 @@ fd_ed25519_point_validate(uchar const buf[ 32 ] ) {
208208

209209
/* fd_ed25519_point_tobytes serializes a point a into
210210
a 32-byte buffer out, and returns out.
211-
out is in little endian form, according to RFC 8032. */
211+
out is in little endian form, according to RFC 8032
212+
(we don't output non-canonical points). */
212213
uchar *
213214
fd_ed25519_point_tobytes( uchar out[ 32 ],
214215
fd_ed25519_point_t const * a );

src/ballet/ed25519/fd_f25519.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -94,14 +94,16 @@ fd_f25519_mul_121666( fd_f25519_t * r,
9494

9595
/* fd_f25519_frombytes deserializes a 32-byte buffer buf into a
9696
fd_f25519_t element r, and returns r.
97-
buf is in little endian form, according to RFC 8032. */
97+
buf is in little endian form, we accept non-canonical elements
98+
unlike RFC 8032. */
9899
fd_f25519_t *
99100
fd_f25519_frombytes( fd_f25519_t * r,
100101
uchar const buf[ 32 ] );
101102

102103
/* fd_f25519_tobytes serializes a fd_f25519_t element a into
103104
a 32-byte buffer out, and returns out.
104-
out is in little endian form, according to RFC 8032. */
105+
out is in little endian form, according to RFC 8032
106+
(we don't output non-canonical elements). */
105107
uchar *
106108
fd_f25519_tobytes( uchar out[ 32 ],
107109
fd_f25519_t const * a );

src/ballet/ed25519/ref/fd_f25519.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,8 @@ fd_f25519_mul_121666( fd_f25519_t * r,
107107

108108
/* fd_f25519_frombytes deserializes a 32-byte buffer buf into a
109109
fd_f25519_t element r, and returns r.
110-
buf is in little endian form, according to RFC 8032. */
110+
buf is in little endian form, we accept non-canonical elements
111+
unlike RFC 8032. */
111112
FD_25519_INLINE fd_f25519_t *
112113
fd_f25519_frombytes( fd_f25519_t * r,
113114
uchar const buf[ 32 ] ) {
@@ -117,7 +118,8 @@ fd_f25519_frombytes( fd_f25519_t * r,
117118

118119
/* fd_f25519_tobytes serializes a fd_f25519_t element a into
119120
a 32-byte buffer out, and returns out.
120-
out is in little endian form, according to RFC 8032. */
121+
out is in little endian form, according to RFC 8032
122+
(we don't output non-canonical elements). */
121123
FD_25519_INLINE uchar *
122124
fd_f25519_tobytes( uchar out[ 32 ],
123125
fd_f25519_t const * a ) {

src/ballet/ed25519/test_ed25519.c

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -701,6 +701,10 @@ test_point_validate( FD_PARAM_UNUSED fd_rng_t * rng ) {
701701
fd_hex_decode( buf, "f0ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff7f", 32 );
702702
FD_TEST_CUSTOM( fd_ed25519_point_validate( buf ), "fd_ed25519_point_validate(01..00)" );
703703

704+
// non-canonical points are accepted
705+
fd_hex_decode( buf, "ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff", 32 );
706+
FD_TEST_CUSTOM( fd_ed25519_point_validate( buf ), "fd_ed25519_point_validate(ff..ff)" );
707+
704708
/* negative tests */
705709

706710
fd_hex_decode( buf, "0200000000000000000000000000000000000000000000000000000000000000", 32 );
@@ -710,6 +714,50 @@ test_point_validate( FD_PARAM_UNUSED fd_rng_t * rng ) {
710714
FD_TEST_CUSTOM( !fd_ed25519_point_validate( buf ), "!fd_ed25519_point_validate(02..00)" );
711715
}
712716

717+
static void
718+
test_point_frombytes( FD_PARAM_UNUSED fd_rng_t * rng ) {
719+
uchar _bufa[32]; uchar * bufa = _bufa;
720+
uchar _bufr[32]; uchar * bufr = _bufr;
721+
uchar _bufx[32]; uchar * bufx = _bufx;
722+
uchar _bufy[32]; uchar * bufy = _bufy;
723+
724+
fd_f25519_t x[1], y[1], z[1], t[1];
725+
fd_ed25519_point_t a[1];
726+
727+
{
728+
fd_hex_decode( bufa, "ffffffffffff0100fffffffffffffffffffffffffffdffffffffffffffffffff", 32 );
729+
fd_hex_decode( bufx, "3d0f773c2d26e69aa19258013f0bb4eb72a8db858498e6c802089ca8972b101b", 32 );
730+
fd_hex_decode( bufy, "ffffffffffff0100fffffffffffffffffffffffffffdffffffffffffffffff7f", 32 );
731+
732+
FD_TEST( fd_ed25519_point_frombytes( a, bufa ) );
733+
734+
fd_ed25519_point_tobytes( bufr, a );
735+
FD_TEST( fd_memeq( bufr, bufa, 32UL ) );
736+
737+
fd_ed25519_point_to( x, y, z, t, a );
738+
fd_f25519_tobytes( bufr, x );
739+
FD_TEST( fd_memeq( bufr, bufx, 32UL ) );
740+
fd_f25519_tobytes( bufr, y );
741+
FD_TEST( fd_memeq( bufr, bufy, 32UL ) );
742+
}
743+
{
744+
fd_hex_decode( bufa, "ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff", 32 );
745+
fd_hex_decode( bufx, "c50fe3127abac974ddd36f74e8988d7fe71cfc79d15fce531b42ccafd973d348", 32 );
746+
fd_hex_decode( bufy, "1200000000000000000000000000000000000000000000000000000000000000", 32 );
747+
748+
FD_TEST( fd_ed25519_point_frombytes( a, bufa ) );
749+
750+
fd_ed25519_point_tobytes( bufr, a );
751+
FD_TEST( !fd_memeq( bufr, bufa, 32UL ) ); // non-canonical
752+
753+
fd_ed25519_point_to( x, y, z, t, a );
754+
fd_f25519_tobytes( bufr, x );
755+
FD_TEST( fd_memeq( bufr, bufx, 32UL ) );
756+
fd_f25519_tobytes( bufr, y );
757+
FD_TEST( fd_memeq( bufr, bufy, 32UL ) );
758+
}
759+
}
760+
713761
static void
714762
test_point_sub( fd_rng_t * rng FD_PARAM_UNUSED ) {
715763
uchar _bufa[32]; uchar * bufa = _bufa;
@@ -764,6 +812,31 @@ test_point_sub( fd_rng_t * rng FD_PARAM_UNUSED ) {
764812
fd_ed25519_point_sub( r, a, b );
765813
fd_ed25519_point_tobytes( bufr, r );
766814

815+
FD_TEST( fd_memeq( bufr, bufe, 32UL ) );
816+
}
817+
{
818+
// this failed sub, non-canonical point
819+
fd_hex_decode( bufa, "0100000000000000000000000000000000b90000000000000000000000000080", 32 );
820+
fd_hex_decode( bufb, "ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff", 32 );
821+
fd_hex_decode( bufe, "39b4ef21660663d8955e024b1a7d921cf76b6300dbd94827d47ec62829a7dddc", 32 );
822+
823+
FD_TEST( fd_ed25519_point_frombytes( a, bufa ) );
824+
FD_TEST( fd_ed25519_point_frombytes( b, bufb ) );
825+
826+
FD_TEST( fd_ed25519_point_frombytes( e, bufe ) );
827+
{
828+
fd_ed25519_point_tobytes( bufr, a );
829+
FD_TEST( fd_memeq( bufr, bufa, 32UL ) );
830+
fd_ed25519_point_tobytes( bufr, b );
831+
FD_TEST( !fd_memeq( bufr, bufb, 32UL ) ); // non-canonical
832+
}
833+
834+
fd_ed25519_point_sub( r, a, b );
835+
fd_ed25519_point_tobytes( bufr, r );
836+
837+
// FD_LOG_HEXDUMP_WARNING(( "bufr", bufr, 32 ));
838+
// FD_LOG_HEXDUMP_WARNING(( "bufe", bufe, 32 ));
839+
767840
FD_TEST( fd_memeq( bufr, bufe, 32UL ) );
768841
}
769842
}
@@ -1225,6 +1298,7 @@ main( int argc,
12251298
test_affine_is_small_order ( rng );
12261299

12271300
test_point_validate( rng );
1301+
test_point_frombytes( rng );
12281302
test_point_sub( rng );
12291303

12301304
test_sc_validate ( rng );

0 commit comments

Comments
 (0)