diff --git a/src/secp256k1/src/bench_internal.c b/src/secp256k1/src/bench_internal.c --- a/src/secp256k1/src/bench_internal.c +++ b/src/secp256k1/src/bench_internal.c @@ -19,10 +19,10 @@ #include "secp256k1.c" typedef struct { - secp256k1_scalar scalar_x, scalar_y; - secp256k1_fe fe_x, fe_y; - secp256k1_ge ge_x, ge_y; - secp256k1_gej gej_x, gej_y; + secp256k1_scalar scalar[2]; + secp256k1_fe fe[4]; + secp256k1_ge ge[2]; + secp256k1_gej gej[2]; unsigned char data[64]; int wnaf[256]; } bench_inv; @@ -30,30 +30,53 @@ void bench_setup(void* arg) { bench_inv *data = (bench_inv*)arg; - static const unsigned char init_x[32] = { - 0x02, 0x03, 0x05, 0x07, 0x0b, 0x0d, 0x11, 0x13, - 0x17, 0x1d, 0x1f, 0x25, 0x29, 0x2b, 0x2f, 0x35, - 0x3b, 0x3d, 0x43, 0x47, 0x49, 0x4f, 0x53, 0x59, - 0x61, 0x65, 0x67, 0x6b, 0x6d, 0x71, 0x7f, 0x83 + static const unsigned char init[4][32] = { + /* Initializer for scalar[0], fe[0], first half of data, the X coordinate of ge[0], + and the (implied affine) X coordinate of gej[0]. */ + { + 0x02, 0x03, 0x05, 0x07, 0x0b, 0x0d, 0x11, 0x13, + 0x17, 0x1d, 0x1f, 0x25, 0x29, 0x2b, 0x2f, 0x35, + 0x3b, 0x3d, 0x43, 0x47, 0x49, 0x4f, 0x53, 0x59, + 0x61, 0x65, 0x67, 0x6b, 0x6d, 0x71, 0x7f, 0x83 + }, + /* Initializer for scalar[1], fe[1], first half of data, the X coordinate of ge[1], + and the (implied affine) X coordinate of gej[1]. */ + { + 0x82, 0x83, 0x85, 0x87, 0x8b, 0x8d, 0x81, 0x83, + 0x97, 0xad, 0xaf, 0xb5, 0xb9, 0xbb, 0xbf, 0xc5, + 0xdb, 0xdd, 0xe3, 0xe7, 0xe9, 0xef, 0xf3, 0xf9, + 0x11, 0x15, 0x17, 0x1b, 0x1d, 0xb1, 0xbf, 0xd3 + }, + /* Initializer for fe[2] and the Z coordinate of gej[0]. */ + { + 0x3d, 0x2d, 0xef, 0xf4, 0x25, 0x98, 0x4f, 0x5d, + 0xe2, 0xca, 0x5f, 0x41, 0x3f, 0x3f, 0xce, 0x44, + 0xaa, 0x2c, 0x53, 0x8a, 0xc6, 0x59, 0x1f, 0x38, + 0x38, 0x23, 0xe4, 0x11, 0x27, 0xc6, 0xa0, 0xe7 + }, + /* Initializer for fe[3] and the Z coordinate of gej[1]. */ + { + 0xbd, 0x21, 0xa5, 0xe1, 0x13, 0x50, 0x73, 0x2e, + 0x52, 0x98, 0xc8, 0x9e, 0xab, 0x00, 0xa2, 0x68, + 0x43, 0xf5, 0xd7, 0x49, 0x80, 0x72, 0xa7, 0xf3, + 0xd7, 0x60, 0xe6, 0xab, 0x90, 0x92, 0xdf, 0xc5 + } }; - static const unsigned char init_y[32] = { - 0x82, 0x83, 0x85, 0x87, 0x8b, 0x8d, 0x81, 0x83, - 0x97, 0xad, 0xaf, 0xb5, 0xb9, 0xbb, 0xbf, 0xc5, - 0xdb, 0xdd, 0xe3, 0xe7, 0xe9, 0xef, 0xf3, 0xf9, - 0x11, 0x15, 0x17, 0x1b, 0x1d, 0xb1, 0xbf, 0xd3 - }; - - secp256k1_scalar_set_b32(&data->scalar_x, init_x, NULL); - secp256k1_scalar_set_b32(&data->scalar_y, init_y, NULL); - secp256k1_fe_set_b32(&data->fe_x, init_x); - secp256k1_fe_set_b32(&data->fe_y, init_y); - CHECK(secp256k1_ge_set_xo_var(&data->ge_x, &data->fe_x, 0)); - CHECK(secp256k1_ge_set_xo_var(&data->ge_y, &data->fe_y, 1)); - secp256k1_gej_set_ge(&data->gej_x, &data->ge_x); - secp256k1_gej_set_ge(&data->gej_y, &data->ge_y); - memcpy(data->data, init_x, 32); - memcpy(data->data + 32, init_y, 32); + secp256k1_scalar_set_b32(&data->scalar[0], init[0], NULL); + secp256k1_scalar_set_b32(&data->scalar[1], init[1], NULL); + secp256k1_fe_set_b32(&data->fe[0], init[0]); + secp256k1_fe_set_b32(&data->fe[1], init[1]); + secp256k1_fe_set_b32(&data->fe[2], init[2]); + secp256k1_fe_set_b32(&data->fe[3], init[3]); + CHECK(secp256k1_ge_set_xo_var(&data->ge[0], &data->fe[0], 0)); + CHECK(secp256k1_ge_set_xo_var(&data->ge[1], &data->fe[1], 1)); + secp256k1_gej_set_ge(&data->gej[0], &data->ge[0]); + secp256k1_gej_rescale(&data->gej[0], &data->fe[2]); + secp256k1_gej_set_ge(&data->gej[1], &data->ge[1]); + secp256k1_gej_rescale(&data->gej[1], &data->fe[3]); + memcpy(data->data, init[0], 32); + memcpy(data->data + 32, init[1], 32); } void bench_scalar_add(void* arg, int iters) { @@ -61,7 +84,7 @@ bench_inv *data = (bench_inv*)arg; for (i = 0; i < iters; i++) { - j += secp256k1_scalar_add(&data->scalar_x, &data->scalar_x, &data->scalar_y); + j += secp256k1_scalar_add(&data->scalar[0], &data->scalar[0], &data->scalar[1]); } CHECK(j <= iters); } @@ -71,7 +94,7 @@ bench_inv *data = (bench_inv*)arg; for (i = 0; i < iters; i++) { - secp256k1_scalar_negate(&data->scalar_x, &data->scalar_x); + secp256k1_scalar_negate(&data->scalar[0], &data->scalar[0]); } } @@ -80,7 +103,7 @@ bench_inv *data = (bench_inv*)arg; for (i = 0; i < iters; i++) { - secp256k1_scalar_sqr(&data->scalar_x, &data->scalar_x); + secp256k1_scalar_sqr(&data->scalar[0], &data->scalar[0]); } } @@ -89,7 +112,7 @@ bench_inv *data = (bench_inv*)arg; for (i = 0; i < iters; i++) { - secp256k1_scalar_mul(&data->scalar_x, &data->scalar_x, &data->scalar_y); + secp256k1_scalar_mul(&data->scalar[0], &data->scalar[0], &data->scalar[1]); } } @@ -99,8 +122,8 @@ bench_inv *data = (bench_inv*)arg; for (i = 0; i < iters; i++) { - secp256k1_scalar_split_lambda(&data->scalar_x, &data->scalar_y, &data->scalar_x); - j += secp256k1_scalar_add(&data->scalar_x, &data->scalar_x, &data->scalar_y); + secp256k1_scalar_split_lambda(&data->scalar[0], &data->scalar[1], &data->scalar[0]); + j += secp256k1_scalar_add(&data->scalar[0], &data->scalar[0], &data->scalar[1]); } CHECK(j <= iters); } @@ -111,8 +134,8 @@ bench_inv *data = (bench_inv*)arg; for (i = 0; i < iters; i++) { - secp256k1_scalar_inverse(&data->scalar_x, &data->scalar_x); - j += secp256k1_scalar_add(&data->scalar_x, &data->scalar_x, &data->scalar_y); + secp256k1_scalar_inverse(&data->scalar[0], &data->scalar[0]); + j += secp256k1_scalar_add(&data->scalar[0], &data->scalar[0], &data->scalar[1]); } CHECK(j <= iters); } @@ -122,8 +145,8 @@ bench_inv *data = (bench_inv*)arg; for (i = 0; i < iters; i++) { - secp256k1_scalar_inverse_var(&data->scalar_x, &data->scalar_x); - j += secp256k1_scalar_add(&data->scalar_x, &data->scalar_x, &data->scalar_y); + secp256k1_scalar_inverse_var(&data->scalar[0], &data->scalar[0]); + j += secp256k1_scalar_add(&data->scalar[0], &data->scalar[0], &data->scalar[1]); } CHECK(j <= iters); } @@ -133,7 +156,7 @@ bench_inv *data = (bench_inv*)arg; for (i = 0; i < iters; i++) { - secp256k1_fe_normalize(&data->fe_x); + secp256k1_fe_normalize(&data->fe[0]); } } @@ -142,7 +165,7 @@ bench_inv *data = (bench_inv*)arg; for (i = 0; i < iters; i++) { - secp256k1_fe_normalize_weak(&data->fe_x); + secp256k1_fe_normalize_weak(&data->fe[0]); } } @@ -151,7 +174,7 @@ bench_inv *data = (bench_inv*)arg; for (i = 0; i < iters; i++) { - secp256k1_fe_mul(&data->fe_x, &data->fe_x, &data->fe_y); + secp256k1_fe_mul(&data->fe[0], &data->fe[0], &data->fe[1]); } } @@ -160,7 +183,7 @@ bench_inv *data = (bench_inv*)arg; for (i = 0; i < iters; i++) { - secp256k1_fe_sqr(&data->fe_x, &data->fe_x); + secp256k1_fe_sqr(&data->fe[0], &data->fe[0]); } } @@ -169,8 +192,8 @@ bench_inv *data = (bench_inv*)arg; for (i = 0; i < iters; i++) { - secp256k1_fe_inv(&data->fe_x, &data->fe_x); - secp256k1_fe_add(&data->fe_x, &data->fe_y); + secp256k1_fe_inv(&data->fe[0], &data->fe[0]); + secp256k1_fe_add(&data->fe[0], &data->fe[1]); } } @@ -179,8 +202,8 @@ bench_inv *data = (bench_inv*)arg; for (i = 0; i < iters; i++) { - secp256k1_fe_inv_var(&data->fe_x, &data->fe_x); - secp256k1_fe_add(&data->fe_x, &data->fe_y); + secp256k1_fe_inv_var(&data->fe[0], &data->fe[0]); + secp256k1_fe_add(&data->fe[0], &data->fe[1]); } } @@ -190,9 +213,9 @@ secp256k1_fe t; for (i = 0; i < iters; i++) { - t = data->fe_x; - j += secp256k1_fe_sqrt(&data->fe_x, &t); - secp256k1_fe_add(&data->fe_x, &data->fe_y); + t = data->fe[0]; + j += secp256k1_fe_sqrt(&data->fe[0], &t); + secp256k1_fe_add(&data->fe[0], &data->fe[1]); } CHECK(j <= iters); } @@ -202,7 +225,7 @@ bench_inv *data = (bench_inv*)arg; for (i = 0; i < iters; i++) { - secp256k1_gej_double_var(&data->gej_x, &data->gej_x, NULL); + secp256k1_gej_double_var(&data->gej[0], &data->gej[0], NULL); } } @@ -211,7 +234,7 @@ bench_inv *data = (bench_inv*)arg; for (i = 0; i < iters; i++) { - secp256k1_gej_add_var(&data->gej_x, &data->gej_x, &data->gej_y, NULL); + secp256k1_gej_add_var(&data->gej[0], &data->gej[0], &data->gej[1], NULL); } } @@ -220,7 +243,7 @@ bench_inv *data = (bench_inv*)arg; for (i = 0; i < iters; i++) { - secp256k1_gej_add_ge(&data->gej_x, &data->gej_x, &data->ge_y); + secp256k1_gej_add_ge(&data->gej[0], &data->gej[0], &data->ge[1]); } } @@ -229,7 +252,7 @@ bench_inv *data = (bench_inv*)arg; for (i = 0; i < iters; i++) { - secp256k1_gej_add_ge_var(&data->gej_x, &data->gej_x, &data->ge_y, NULL); + secp256k1_gej_add_ge_var(&data->gej[0], &data->gej[0], &data->ge[1], NULL); } } @@ -238,9 +261,37 @@ bench_inv *data = (bench_inv*)arg; for (i = 0; i < iters; i++) { - j += secp256k1_gej_has_quad_y_var(&data->gej_x); + j += secp256k1_gej_has_quad_y_var(&data->gej[0]); + /* Vary the Y and Z coordinates of the input (the X coordinate doesn't matter to + secp256k1_gej_has_quad_y_var). Note that the resulting coordinates will + generally not correspond to a point on the curve, but this is not a problem + for the code being benchmarked here. Adding and normalizing have less + overhead than EC operations (which could guarantee the point remains on the + curve). */ + secp256k1_fe_add(&data->gej[0].y, &data->fe[1]); + secp256k1_fe_add(&data->gej[0].z, &data->fe[2]); + secp256k1_fe_normalize_var(&data->gej[0].y); + secp256k1_fe_normalize_var(&data->gej[0].z); + } + CHECK(j <= iters); +} + +void bench_group_to_affine_var(void* arg, int iters) { + int i; + bench_inv *data = (bench_inv*)arg; + + for (i = 0; i < iters; ++i) { + secp256k1_ge_set_gej_var(&data->ge[1], &data->gej[0]); + /* Use the output affine X/Y coordinates to vary the input X/Y/Z coordinates. + Similar to bench_group_jacobi_var, this approach does not result in + coordinates of points on the curve. */ + secp256k1_fe_add(&data->gej[0].x, &data->ge[1].y); + secp256k1_fe_add(&data->gej[0].y, &data->fe[2]); + secp256k1_fe_add(&data->gej[0].z, &data->ge[1].x); + secp256k1_fe_normalize_var(&data->gej[0].x); + secp256k1_fe_normalize_var(&data->gej[0].y); + secp256k1_fe_normalize_var(&data->gej[0].z); } - CHECK(j == iters); } void bench_ecmult_wnaf(void* arg, int iters) { @@ -248,8 +299,8 @@ bench_inv *data = (bench_inv*)arg; for (i = 0; i < iters; i++) { - bits += secp256k1_ecmult_wnaf(data->wnaf, 256, &data->scalar_x, WINDOW_A); - overflow += secp256k1_scalar_add(&data->scalar_x, &data->scalar_x, &data->scalar_y); + bits += secp256k1_ecmult_wnaf(data->wnaf, 256, &data->scalar[0], WINDOW_A); + overflow += secp256k1_scalar_add(&data->scalar[0], &data->scalar[0], &data->scalar[1]); } CHECK(overflow >= 0); CHECK(bits <= 256*iters); @@ -260,8 +311,8 @@ bench_inv *data = (bench_inv*)arg; for (i = 0; i < iters; i++) { - bits += secp256k1_wnaf_const(data->wnaf, &data->scalar_x, WINDOW_A, 256); - overflow += secp256k1_scalar_add(&data->scalar_x, &data->scalar_x, &data->scalar_y); + bits += secp256k1_wnaf_const(data->wnaf, &data->scalar[0], WINDOW_A, 256); + overflow += secp256k1_scalar_add(&data->scalar[0], &data->scalar[0], &data->scalar[1]); } CHECK(overflow >= 0); CHECK(bits <= 256*iters); @@ -323,14 +374,15 @@ void bench_num_jacobi(void* arg, int iters) { int i, j = 0; bench_inv *data = (bench_inv*)arg; - secp256k1_num nx, norder; + secp256k1_num nx, na, norder; - secp256k1_scalar_get_num(&nx, &data->scalar_x); + secp256k1_scalar_get_num(&nx, &data->scalar[0]); secp256k1_scalar_order_get_num(&norder); - secp256k1_scalar_get_num(&norder, &data->scalar_y); + secp256k1_scalar_get_num(&na, &data->scalar[1]); for (i = 0; i < iters; i++) { j += secp256k1_num_jacobi(&nx, &norder); + secp256k1_num_add(&nx, &nx, &na); } CHECK(j <= iters); } @@ -363,6 +415,7 @@ if (have_flag(argc, argv, "group") || have_flag(argc, argv, "add")) run_benchmark("group_add_affine", bench_group_add_affine, bench_setup, NULL, &data, 10, iters*10); if (have_flag(argc, argv, "group") || have_flag(argc, argv, "add")) run_benchmark("group_add_affine_var", bench_group_add_affine_var, bench_setup, NULL, &data, 10, iters*10); if (have_flag(argc, argv, "group") || have_flag(argc, argv, "jacobi")) run_benchmark("group_jacobi_var", bench_group_jacobi_var, bench_setup, NULL, &data, 10, iters); + if (have_flag(argc, argv, "group") || have_flag(argc, argv, "to_affine")) run_benchmark("group_to_affine_var", bench_group_to_affine_var, bench_setup, NULL, &data, 10, iters); if (have_flag(argc, argv, "ecmult") || have_flag(argc, argv, "wnaf")) run_benchmark("wnaf_const", bench_wnaf_const, bench_setup, NULL, &data, 10, iters); if (have_flag(argc, argv, "ecmult") || have_flag(argc, argv, "wnaf")) run_benchmark("ecmult_wnaf", bench_ecmult_wnaf, bench_setup, NULL, &data, 10, iters);