Skip to content

Commit a1e34a7

Browse files
sipadeadalnix
authored andcommitted
Fix Jacobi benchmarks and other benchmark improvements
Summary: * Rename bench_internal variables The _x and _y suffices are confusing; they don't actually correspond to X and Y coordinates. Instead replace them with arrays. * Randomize the Z coordinates in bench_internal Also increase the number of fe inputs. * Make jacobi benchmarks vary inputs Also make the num_jacobi benchmark use the scalar order as modulus, instead of a random number. * Add benchmark for secp256k1_ge_set_gej_var This is a backport of secp256k1 [[bitcoin-core/secp256k1#797 | PR797]] Test Plan: ninja bench-secp256k1 Reviewers: #bitcoin_abc, jasonbcox Reviewed By: #bitcoin_abc, jasonbcox Differential Revision: https://reviews.bitcoinabc.org/D7612
1 parent aa004f4 commit a1e34a7

File tree

1 file changed

+113
-60
lines changed

1 file changed

+113
-60
lines changed

src/bench_internal.c

Lines changed: 113 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -19,49 +19,72 @@
1919
#include "secp256k1.c"
2020

2121
typedef struct {
22-
secp256k1_scalar scalar_x, scalar_y;
23-
secp256k1_fe fe_x, fe_y;
24-
secp256k1_ge ge_x, ge_y;
25-
secp256k1_gej gej_x, gej_y;
22+
secp256k1_scalar scalar[2];
23+
secp256k1_fe fe[4];
24+
secp256k1_ge ge[2];
25+
secp256k1_gej gej[2];
2626
unsigned char data[64];
2727
int wnaf[256];
2828
} bench_inv;
2929

3030
void bench_setup(void* arg) {
3131
bench_inv *data = (bench_inv*)arg;
3232

33-
static const unsigned char init_x[32] = {
34-
0x02, 0x03, 0x05, 0x07, 0x0b, 0x0d, 0x11, 0x13,
35-
0x17, 0x1d, 0x1f, 0x25, 0x29, 0x2b, 0x2f, 0x35,
36-
0x3b, 0x3d, 0x43, 0x47, 0x49, 0x4f, 0x53, 0x59,
37-
0x61, 0x65, 0x67, 0x6b, 0x6d, 0x71, 0x7f, 0x83
33+
static const unsigned char init[4][32] = {
34+
/* Initializer for scalar[0], fe[0], first half of data, the X coordinate of ge[0],
35+
and the (implied affine) X coordinate of gej[0]. */
36+
{
37+
0x02, 0x03, 0x05, 0x07, 0x0b, 0x0d, 0x11, 0x13,
38+
0x17, 0x1d, 0x1f, 0x25, 0x29, 0x2b, 0x2f, 0x35,
39+
0x3b, 0x3d, 0x43, 0x47, 0x49, 0x4f, 0x53, 0x59,
40+
0x61, 0x65, 0x67, 0x6b, 0x6d, 0x71, 0x7f, 0x83
41+
},
42+
/* Initializer for scalar[1], fe[1], first half of data, the X coordinate of ge[1],
43+
and the (implied affine) X coordinate of gej[1]. */
44+
{
45+
0x82, 0x83, 0x85, 0x87, 0x8b, 0x8d, 0x81, 0x83,
46+
0x97, 0xad, 0xaf, 0xb5, 0xb9, 0xbb, 0xbf, 0xc5,
47+
0xdb, 0xdd, 0xe3, 0xe7, 0xe9, 0xef, 0xf3, 0xf9,
48+
0x11, 0x15, 0x17, 0x1b, 0x1d, 0xb1, 0xbf, 0xd3
49+
},
50+
/* Initializer for fe[2] and the Z coordinate of gej[0]. */
51+
{
52+
0x3d, 0x2d, 0xef, 0xf4, 0x25, 0x98, 0x4f, 0x5d,
53+
0xe2, 0xca, 0x5f, 0x41, 0x3f, 0x3f, 0xce, 0x44,
54+
0xaa, 0x2c, 0x53, 0x8a, 0xc6, 0x59, 0x1f, 0x38,
55+
0x38, 0x23, 0xe4, 0x11, 0x27, 0xc6, 0xa0, 0xe7
56+
},
57+
/* Initializer for fe[3] and the Z coordinate of gej[1]. */
58+
{
59+
0xbd, 0x21, 0xa5, 0xe1, 0x13, 0x50, 0x73, 0x2e,
60+
0x52, 0x98, 0xc8, 0x9e, 0xab, 0x00, 0xa2, 0x68,
61+
0x43, 0xf5, 0xd7, 0x49, 0x80, 0x72, 0xa7, 0xf3,
62+
0xd7, 0x60, 0xe6, 0xab, 0x90, 0x92, 0xdf, 0xc5
63+
}
3864
};
3965

40-
static const unsigned char init_y[32] = {
41-
0x82, 0x83, 0x85, 0x87, 0x8b, 0x8d, 0x81, 0x83,
42-
0x97, 0xad, 0xaf, 0xb5, 0xb9, 0xbb, 0xbf, 0xc5,
43-
0xdb, 0xdd, 0xe3, 0xe7, 0xe9, 0xef, 0xf3, 0xf9,
44-
0x11, 0x15, 0x17, 0x1b, 0x1d, 0xb1, 0xbf, 0xd3
45-
};
46-
47-
secp256k1_scalar_set_b32(&data->scalar_x, init_x, NULL);
48-
secp256k1_scalar_set_b32(&data->scalar_y, init_y, NULL);
49-
secp256k1_fe_set_b32(&data->fe_x, init_x);
50-
secp256k1_fe_set_b32(&data->fe_y, init_y);
51-
CHECK(secp256k1_ge_set_xo_var(&data->ge_x, &data->fe_x, 0));
52-
CHECK(secp256k1_ge_set_xo_var(&data->ge_y, &data->fe_y, 1));
53-
secp256k1_gej_set_ge(&data->gej_x, &data->ge_x);
54-
secp256k1_gej_set_ge(&data->gej_y, &data->ge_y);
55-
memcpy(data->data, init_x, 32);
56-
memcpy(data->data + 32, init_y, 32);
66+
secp256k1_scalar_set_b32(&data->scalar[0], init[0], NULL);
67+
secp256k1_scalar_set_b32(&data->scalar[1], init[1], NULL);
68+
secp256k1_fe_set_b32(&data->fe[0], init[0]);
69+
secp256k1_fe_set_b32(&data->fe[1], init[1]);
70+
secp256k1_fe_set_b32(&data->fe[2], init[2]);
71+
secp256k1_fe_set_b32(&data->fe[3], init[3]);
72+
CHECK(secp256k1_ge_set_xo_var(&data->ge[0], &data->fe[0], 0));
73+
CHECK(secp256k1_ge_set_xo_var(&data->ge[1], &data->fe[1], 1));
74+
secp256k1_gej_set_ge(&data->gej[0], &data->ge[0]);
75+
secp256k1_gej_rescale(&data->gej[0], &data->fe[2]);
76+
secp256k1_gej_set_ge(&data->gej[1], &data->ge[1]);
77+
secp256k1_gej_rescale(&data->gej[1], &data->fe[3]);
78+
memcpy(data->data, init[0], 32);
79+
memcpy(data->data + 32, init[1], 32);
5780
}
5881

5982
void bench_scalar_add(void* arg, int iters) {
6083
int i, j = 0;
6184
bench_inv *data = (bench_inv*)arg;
6285

6386
for (i = 0; i < iters; i++) {
64-
j += secp256k1_scalar_add(&data->scalar_x, &data->scalar_x, &data->scalar_y);
87+
j += secp256k1_scalar_add(&data->scalar[0], &data->scalar[0], &data->scalar[1]);
6588
}
6689
CHECK(j <= iters);
6790
}
@@ -71,7 +94,7 @@ void bench_scalar_negate(void* arg, int iters) {
7194
bench_inv *data = (bench_inv*)arg;
7295

7396
for (i = 0; i < iters; i++) {
74-
secp256k1_scalar_negate(&data->scalar_x, &data->scalar_x);
97+
secp256k1_scalar_negate(&data->scalar[0], &data->scalar[0]);
7598
}
7699
}
77100

@@ -80,7 +103,7 @@ void bench_scalar_sqr(void* arg, int iters) {
80103
bench_inv *data = (bench_inv*)arg;
81104

82105
for (i = 0; i < iters; i++) {
83-
secp256k1_scalar_sqr(&data->scalar_x, &data->scalar_x);
106+
secp256k1_scalar_sqr(&data->scalar[0], &data->scalar[0]);
84107
}
85108
}
86109

@@ -89,7 +112,7 @@ void bench_scalar_mul(void* arg, int iters) {
89112
bench_inv *data = (bench_inv*)arg;
90113

91114
for (i = 0; i < iters; i++) {
92-
secp256k1_scalar_mul(&data->scalar_x, &data->scalar_x, &data->scalar_y);
115+
secp256k1_scalar_mul(&data->scalar[0], &data->scalar[0], &data->scalar[1]);
93116
}
94117
}
95118

@@ -99,8 +122,8 @@ void bench_scalar_split(void* arg, int iters) {
99122
bench_inv *data = (bench_inv*)arg;
100123

101124
for (i = 0; i < iters; i++) {
102-
secp256k1_scalar_split_lambda(&data->scalar_x, &data->scalar_y, &data->scalar_x);
103-
j += secp256k1_scalar_add(&data->scalar_x, &data->scalar_x, &data->scalar_y);
125+
secp256k1_scalar_split_lambda(&data->scalar[0], &data->scalar[1], &data->scalar[0]);
126+
j += secp256k1_scalar_add(&data->scalar[0], &data->scalar[0], &data->scalar[1]);
104127
}
105128
CHECK(j <= iters);
106129
}
@@ -111,8 +134,8 @@ void bench_scalar_inverse(void* arg, int iters) {
111134
bench_inv *data = (bench_inv*)arg;
112135

113136
for (i = 0; i < iters; i++) {
114-
secp256k1_scalar_inverse(&data->scalar_x, &data->scalar_x);
115-
j += secp256k1_scalar_add(&data->scalar_x, &data->scalar_x, &data->scalar_y);
137+
secp256k1_scalar_inverse(&data->scalar[0], &data->scalar[0]);
138+
j += secp256k1_scalar_add(&data->scalar[0], &data->scalar[0], &data->scalar[1]);
116139
}
117140
CHECK(j <= iters);
118141
}
@@ -122,8 +145,8 @@ void bench_scalar_inverse_var(void* arg, int iters) {
122145
bench_inv *data = (bench_inv*)arg;
123146

124147
for (i = 0; i < iters; i++) {
125-
secp256k1_scalar_inverse_var(&data->scalar_x, &data->scalar_x);
126-
j += secp256k1_scalar_add(&data->scalar_x, &data->scalar_x, &data->scalar_y);
148+
secp256k1_scalar_inverse_var(&data->scalar[0], &data->scalar[0]);
149+
j += secp256k1_scalar_add(&data->scalar[0], &data->scalar[0], &data->scalar[1]);
127150
}
128151
CHECK(j <= iters);
129152
}
@@ -133,7 +156,7 @@ void bench_field_normalize(void* arg, int iters) {
133156
bench_inv *data = (bench_inv*)arg;
134157

135158
for (i = 0; i < iters; i++) {
136-
secp256k1_fe_normalize(&data->fe_x);
159+
secp256k1_fe_normalize(&data->fe[0]);
137160
}
138161
}
139162

@@ -142,7 +165,7 @@ void bench_field_normalize_weak(void* arg, int iters) {
142165
bench_inv *data = (bench_inv*)arg;
143166

144167
for (i = 0; i < iters; i++) {
145-
secp256k1_fe_normalize_weak(&data->fe_x);
168+
secp256k1_fe_normalize_weak(&data->fe[0]);
146169
}
147170
}
148171

@@ -151,7 +174,7 @@ void bench_field_mul(void* arg, int iters) {
151174
bench_inv *data = (bench_inv*)arg;
152175

153176
for (i = 0; i < iters; i++) {
154-
secp256k1_fe_mul(&data->fe_x, &data->fe_x, &data->fe_y);
177+
secp256k1_fe_mul(&data->fe[0], &data->fe[0], &data->fe[1]);
155178
}
156179
}
157180

@@ -160,7 +183,7 @@ void bench_field_sqr(void* arg, int iters) {
160183
bench_inv *data = (bench_inv*)arg;
161184

162185
for (i = 0; i < iters; i++) {
163-
secp256k1_fe_sqr(&data->fe_x, &data->fe_x);
186+
secp256k1_fe_sqr(&data->fe[0], &data->fe[0]);
164187
}
165188
}
166189

@@ -169,8 +192,8 @@ void bench_field_inverse(void* arg, int iters) {
169192
bench_inv *data = (bench_inv*)arg;
170193

171194
for (i = 0; i < iters; i++) {
172-
secp256k1_fe_inv(&data->fe_x, &data->fe_x);
173-
secp256k1_fe_add(&data->fe_x, &data->fe_y);
195+
secp256k1_fe_inv(&data->fe[0], &data->fe[0]);
196+
secp256k1_fe_add(&data->fe[0], &data->fe[1]);
174197
}
175198
}
176199

@@ -179,8 +202,8 @@ void bench_field_inverse_var(void* arg, int iters) {
179202
bench_inv *data = (bench_inv*)arg;
180203

181204
for (i = 0; i < iters; i++) {
182-
secp256k1_fe_inv_var(&data->fe_x, &data->fe_x);
183-
secp256k1_fe_add(&data->fe_x, &data->fe_y);
205+
secp256k1_fe_inv_var(&data->fe[0], &data->fe[0]);
206+
secp256k1_fe_add(&data->fe[0], &data->fe[1]);
184207
}
185208
}
186209

@@ -190,9 +213,9 @@ void bench_field_sqrt(void* arg, int iters) {
190213
secp256k1_fe t;
191214

192215
for (i = 0; i < iters; i++) {
193-
t = data->fe_x;
194-
j += secp256k1_fe_sqrt(&data->fe_x, &t);
195-
secp256k1_fe_add(&data->fe_x, &data->fe_y);
216+
t = data->fe[0];
217+
j += secp256k1_fe_sqrt(&data->fe[0], &t);
218+
secp256k1_fe_add(&data->fe[0], &data->fe[1]);
196219
}
197220
CHECK(j <= iters);
198221
}
@@ -202,7 +225,7 @@ void bench_group_double_var(void* arg, int iters) {
202225
bench_inv *data = (bench_inv*)arg;
203226

204227
for (i = 0; i < iters; i++) {
205-
secp256k1_gej_double_var(&data->gej_x, &data->gej_x, NULL);
228+
secp256k1_gej_double_var(&data->gej[0], &data->gej[0], NULL);
206229
}
207230
}
208231

@@ -211,7 +234,7 @@ void bench_group_add_var(void* arg, int iters) {
211234
bench_inv *data = (bench_inv*)arg;
212235

213236
for (i = 0; i < iters; i++) {
214-
secp256k1_gej_add_var(&data->gej_x, &data->gej_x, &data->gej_y, NULL);
237+
secp256k1_gej_add_var(&data->gej[0], &data->gej[0], &data->gej[1], NULL);
215238
}
216239
}
217240

@@ -220,7 +243,7 @@ void bench_group_add_affine(void* arg, int iters) {
220243
bench_inv *data = (bench_inv*)arg;
221244

222245
for (i = 0; i < iters; i++) {
223-
secp256k1_gej_add_ge(&data->gej_x, &data->gej_x, &data->ge_y);
246+
secp256k1_gej_add_ge(&data->gej[0], &data->gej[0], &data->ge[1]);
224247
}
225248
}
226249

@@ -229,7 +252,7 @@ void bench_group_add_affine_var(void* arg, int iters) {
229252
bench_inv *data = (bench_inv*)arg;
230253

231254
for (i = 0; i < iters; i++) {
232-
secp256k1_gej_add_ge_var(&data->gej_x, &data->gej_x, &data->ge_y, NULL);
255+
secp256k1_gej_add_ge_var(&data->gej[0], &data->gej[0], &data->ge[1], NULL);
233256
}
234257
}
235258

@@ -238,18 +261,46 @@ void bench_group_jacobi_var(void* arg, int iters) {
238261
bench_inv *data = (bench_inv*)arg;
239262

240263
for (i = 0; i < iters; i++) {
241-
j += secp256k1_gej_has_quad_y_var(&data->gej_x);
264+
j += secp256k1_gej_has_quad_y_var(&data->gej[0]);
265+
/* Vary the Y and Z coordinates of the input (the X coordinate doesn't matter to
266+
secp256k1_gej_has_quad_y_var). Note that the resulting coordinates will
267+
generally not correspond to a point on the curve, but this is not a problem
268+
for the code being benchmarked here. Adding and normalizing have less
269+
overhead than EC operations (which could guarantee the point remains on the
270+
curve). */
271+
secp256k1_fe_add(&data->gej[0].y, &data->fe[1]);
272+
secp256k1_fe_add(&data->gej[0].z, &data->fe[2]);
273+
secp256k1_fe_normalize_var(&data->gej[0].y);
274+
secp256k1_fe_normalize_var(&data->gej[0].z);
275+
}
276+
CHECK(j <= iters);
277+
}
278+
279+
void bench_group_to_affine_var(void* arg, int iters) {
280+
int i;
281+
bench_inv *data = (bench_inv*)arg;
282+
283+
for (i = 0; i < iters; ++i) {
284+
secp256k1_ge_set_gej_var(&data->ge[1], &data->gej[0]);
285+
/* Use the output affine X/Y coordinates to vary the input X/Y/Z coordinates.
286+
Similar to bench_group_jacobi_var, this approach does not result in
287+
coordinates of points on the curve. */
288+
secp256k1_fe_add(&data->gej[0].x, &data->ge[1].y);
289+
secp256k1_fe_add(&data->gej[0].y, &data->fe[2]);
290+
secp256k1_fe_add(&data->gej[0].z, &data->ge[1].x);
291+
secp256k1_fe_normalize_var(&data->gej[0].x);
292+
secp256k1_fe_normalize_var(&data->gej[0].y);
293+
secp256k1_fe_normalize_var(&data->gej[0].z);
242294
}
243-
CHECK(j == iters);
244295
}
245296

246297
void bench_ecmult_wnaf(void* arg, int iters) {
247298
int i, bits = 0, overflow = 0;
248299
bench_inv *data = (bench_inv*)arg;
249300

250301
for (i = 0; i < iters; i++) {
251-
bits += secp256k1_ecmult_wnaf(data->wnaf, 256, &data->scalar_x, WINDOW_A);
252-
overflow += secp256k1_scalar_add(&data->scalar_x, &data->scalar_x, &data->scalar_y);
302+
bits += secp256k1_ecmult_wnaf(data->wnaf, 256, &data->scalar[0], WINDOW_A);
303+
overflow += secp256k1_scalar_add(&data->scalar[0], &data->scalar[0], &data->scalar[1]);
253304
}
254305
CHECK(overflow >= 0);
255306
CHECK(bits <= 256*iters);
@@ -260,8 +311,8 @@ void bench_wnaf_const(void* arg, int iters) {
260311
bench_inv *data = (bench_inv*)arg;
261312

262313
for (i = 0; i < iters; i++) {
263-
bits += secp256k1_wnaf_const(data->wnaf, &data->scalar_x, WINDOW_A, 256);
264-
overflow += secp256k1_scalar_add(&data->scalar_x, &data->scalar_x, &data->scalar_y);
314+
bits += secp256k1_wnaf_const(data->wnaf, &data->scalar[0], WINDOW_A, 256);
315+
overflow += secp256k1_scalar_add(&data->scalar[0], &data->scalar[0], &data->scalar[1]);
265316
}
266317
CHECK(overflow >= 0);
267318
CHECK(bits <= 256*iters);
@@ -323,14 +374,15 @@ void bench_context_sign(void* arg, int iters) {
323374
void bench_num_jacobi(void* arg, int iters) {
324375
int i, j = 0;
325376
bench_inv *data = (bench_inv*)arg;
326-
secp256k1_num nx, norder;
377+
secp256k1_num nx, na, norder;
327378

328-
secp256k1_scalar_get_num(&nx, &data->scalar_x);
379+
secp256k1_scalar_get_num(&nx, &data->scalar[0]);
329380
secp256k1_scalar_order_get_num(&norder);
330-
secp256k1_scalar_get_num(&norder, &data->scalar_y);
381+
secp256k1_scalar_get_num(&na, &data->scalar[1]);
331382

332383
for (i = 0; i < iters; i++) {
333384
j += secp256k1_num_jacobi(&nx, &norder);
385+
secp256k1_num_add(&nx, &nx, &na);
334386
}
335387
CHECK(j <= iters);
336388
}
@@ -363,6 +415,7 @@ int main(int argc, char **argv) {
363415
if (have_flag(argc, argv, "group") || have_flag(argc, argv, "add")) run_benchmark("group_add_affine", bench_group_add_affine, bench_setup, NULL, &data, 10, iters*10);
364416
if (have_flag(argc, argv, "group") || have_flag(argc, argv, "add")) run_benchmark("group_add_affine_var", bench_group_add_affine_var, bench_setup, NULL, &data, 10, iters*10);
365417
if (have_flag(argc, argv, "group") || have_flag(argc, argv, "jacobi")) run_benchmark("group_jacobi_var", bench_group_jacobi_var, bench_setup, NULL, &data, 10, iters);
418+
if (have_flag(argc, argv, "group") || have_flag(argc, argv, "to_affine")) run_benchmark("group_to_affine_var", bench_group_to_affine_var, bench_setup, NULL, &data, 10, iters);
366419

367420
if (have_flag(argc, argv, "ecmult") || have_flag(argc, argv, "wnaf")) run_benchmark("wnaf_const", bench_wnaf_const, bench_setup, NULL, &data, 10, iters);
368421
if (have_flag(argc, argv, "ecmult") || have_flag(argc, argv, "wnaf")) run_benchmark("ecmult_wnaf", bench_ecmult_wnaf, bench_setup, NULL, &data, 10, iters);

0 commit comments

Comments
 (0)