diff --git a/src/secp256k1/src/bench_internal.c b/src/secp256k1/src/bench_internal.c --- a/src/secp256k1/src/bench_internal.c +++ b/src/secp256k1/src/bench_internal.c @@ -99,15 +99,6 @@ } } -void bench_scalar_sqr(void* arg, int iters) { - int i; - bench_inv *data = (bench_inv*)arg; - - for (i = 0; i < iters; i++) { - secp256k1_scalar_sqr(&data->scalar[0], &data->scalar[0]); - } -} - void bench_scalar_mul(void* arg, int iters) { int i; bench_inv *data = (bench_inv*)arg; @@ -393,7 +384,6 @@ if (have_flag(argc, argv, "scalar") || have_flag(argc, argv, "add")) run_benchmark("scalar_add", bench_scalar_add, bench_setup, NULL, &data, 10, iters*100); if (have_flag(argc, argv, "scalar") || have_flag(argc, argv, "negate")) run_benchmark("scalar_negate", bench_scalar_negate, bench_setup, NULL, &data, 10, iters*100); - if (have_flag(argc, argv, "scalar") || have_flag(argc, argv, "sqr")) run_benchmark("scalar_sqr", bench_scalar_sqr, bench_setup, NULL, &data, 10, iters*10); if (have_flag(argc, argv, "scalar") || have_flag(argc, argv, "mul")) run_benchmark("scalar_mul", bench_scalar_mul, bench_setup, NULL, &data, 10, iters*10); if (have_flag(argc, argv, "scalar") || have_flag(argc, argv, "split")) run_benchmark("scalar_split", bench_scalar_split, bench_setup, NULL, &data, 10, iters); if (have_flag(argc, argv, "scalar") || have_flag(argc, argv, "inverse")) run_benchmark("scalar_inverse", bench_scalar_inverse, bench_setup, NULL, &data, 10, 2000); diff --git a/src/secp256k1/src/scalar.h b/src/secp256k1/src/scalar.h --- a/src/secp256k1/src/scalar.h +++ b/src/secp256k1/src/scalar.h @@ -63,9 +63,6 @@ * the low bits that were shifted off */ static int secp256k1_scalar_shr_int(secp256k1_scalar *r, int n); -/** Compute the square of a scalar (modulo the group order). */ -static void secp256k1_scalar_sqr(secp256k1_scalar *r, const secp256k1_scalar *a); - /** Compute the inverse of a scalar (modulo the group order). */ static void secp256k1_scalar_inverse(secp256k1_scalar *r, const secp256k1_scalar *a); diff --git a/src/secp256k1/src/scalar_4x64_impl.h b/src/secp256k1/src/scalar_4x64_impl.h --- a/src/secp256k1/src/scalar_4x64_impl.h +++ b/src/secp256k1/src/scalar_4x64_impl.h @@ -214,28 +214,6 @@ VERIFY_CHECK(c1 >= th); \ } -/** Add 2*a*b to the number defined by (c0,c1,c2). c2 must never overflow. */ -#define muladd2(a,b) { \ - uint64_t tl, th, th2, tl2; \ - { \ - uint128_t t = (uint128_t)a * b; \ - th = t >> 64; /* at most 0xFFFFFFFFFFFFFFFE */ \ - tl = t; \ - } \ - th2 = th + th; /* at most 0xFFFFFFFFFFFFFFFE (in case th was 0x7FFFFFFFFFFFFFFF) */ \ - c2 += (th2 < th); /* never overflows by contract (verified the next line) */ \ - VERIFY_CHECK((th2 >= th) || (c2 != 0)); \ - tl2 = tl + tl; /* at most 0xFFFFFFFFFFFFFFFE (in case the lowest 63 bits of tl were 0x7FFFFFFFFFFFFFFF) */ \ - th2 += (tl2 < tl); /* at most 0xFFFFFFFFFFFFFFFF */ \ - c0 += tl2; /* overflow is handled on the next line */ \ - th2 += (c0 < tl2); /* second overflow is handled on the next line */ \ - c2 += (c0 < tl2) & (th2 == 0); /* never overflows by contract (verified the next line) */ \ - VERIFY_CHECK((c0 >= tl2) || (th2 != 0) || (c2 != 0)); \ - c1 += th2; /* overflow is handled on the next line */ \ - c2 += (c1 < th2); /* never overflows by contract (verified the next line) */ \ - VERIFY_CHECK((c1 >= th2) || (c2 != 0)); \ -} - /** Add a to the number defined by (c0,c1,c2). c2 must never overflow. */ #define sumadd(a) { \ unsigned int over; \ @@ -745,148 +723,10 @@ #endif } -static void secp256k1_scalar_sqr_512(uint64_t l[8], const secp256k1_scalar *a) { -#ifdef USE_ASM_X86_64 - __asm__ __volatile__( - /* Preload */ - "movq 0(%%rdi), %%r11\n" - "movq 8(%%rdi), %%r12\n" - "movq 16(%%rdi), %%r13\n" - "movq 24(%%rdi), %%r14\n" - /* (rax,rdx) = a0 * a0 */ - "movq %%r11, %%rax\n" - "mulq %%r11\n" - /* Extract l0 */ - "movq %%rax, 0(%%rsi)\n" - /* (r8,r9,r10) = (rdx,0) */ - "movq %%rdx, %%r8\n" - "xorq %%r9, %%r9\n" - "xorq %%r10, %%r10\n" - /* (r8,r9,r10) += 2 * a0 * a1 */ - "movq %%r11, %%rax\n" - "mulq %%r12\n" - "addq %%rax, %%r8\n" - "adcq %%rdx, %%r9\n" - "adcq $0, %%r10\n" - "addq %%rax, %%r8\n" - "adcq %%rdx, %%r9\n" - "adcq $0, %%r10\n" - /* Extract l1 */ - "movq %%r8, 8(%%rsi)\n" - "xorq %%r8, %%r8\n" - /* (r9,r10,r8) += 2 * a0 * a2 */ - "movq %%r11, %%rax\n" - "mulq %%r13\n" - "addq %%rax, %%r9\n" - "adcq %%rdx, %%r10\n" - "adcq $0, %%r8\n" - "addq %%rax, %%r9\n" - "adcq %%rdx, %%r10\n" - "adcq $0, %%r8\n" - /* (r9,r10,r8) += a1 * a1 */ - "movq %%r12, %%rax\n" - "mulq %%r12\n" - "addq %%rax, %%r9\n" - "adcq %%rdx, %%r10\n" - "adcq $0, %%r8\n" - /* Extract l2 */ - "movq %%r9, 16(%%rsi)\n" - "xorq %%r9, %%r9\n" - /* (r10,r8,r9) += 2 * a0 * a3 */ - "movq %%r11, %%rax\n" - "mulq %%r14\n" - "addq %%rax, %%r10\n" - "adcq %%rdx, %%r8\n" - "adcq $0, %%r9\n" - "addq %%rax, %%r10\n" - "adcq %%rdx, %%r8\n" - "adcq $0, %%r9\n" - /* (r10,r8,r9) += 2 * a1 * a2 */ - "movq %%r12, %%rax\n" - "mulq %%r13\n" - "addq %%rax, %%r10\n" - "adcq %%rdx, %%r8\n" - "adcq $0, %%r9\n" - "addq %%rax, %%r10\n" - "adcq %%rdx, %%r8\n" - "adcq $0, %%r9\n" - /* Extract l3 */ - "movq %%r10, 24(%%rsi)\n" - "xorq %%r10, %%r10\n" - /* (r8,r9,r10) += 2 * a1 * a3 */ - "movq %%r12, %%rax\n" - "mulq %%r14\n" - "addq %%rax, %%r8\n" - "adcq %%rdx, %%r9\n" - "adcq $0, %%r10\n" - "addq %%rax, %%r8\n" - "adcq %%rdx, %%r9\n" - "adcq $0, %%r10\n" - /* (r8,r9,r10) += a2 * a2 */ - "movq %%r13, %%rax\n" - "mulq %%r13\n" - "addq %%rax, %%r8\n" - "adcq %%rdx, %%r9\n" - "adcq $0, %%r10\n" - /* Extract l4 */ - "movq %%r8, 32(%%rsi)\n" - "xorq %%r8, %%r8\n" - /* (r9,r10,r8) += 2 * a2 * a3 */ - "movq %%r13, %%rax\n" - "mulq %%r14\n" - "addq %%rax, %%r9\n" - "adcq %%rdx, %%r10\n" - "adcq $0, %%r8\n" - "addq %%rax, %%r9\n" - "adcq %%rdx, %%r10\n" - "adcq $0, %%r8\n" - /* Extract l5 */ - "movq %%r9, 40(%%rsi)\n" - /* (r10,r8) += a3 * a3 */ - "movq %%r14, %%rax\n" - "mulq %%r14\n" - "addq %%rax, %%r10\n" - "adcq %%rdx, %%r8\n" - /* Extract l6 */ - "movq %%r10, 48(%%rsi)\n" - /* Extract l7 */ - "movq %%r8, 56(%%rsi)\n" - : - : "S"(l), "D"(a->d) - : "rax", "rdx", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "cc", "memory"); -#else - /* 160 bit accumulator. */ - uint64_t c0 = 0, c1 = 0; - uint32_t c2 = 0; - - /* l[0..7] = a[0..3] * b[0..3]. */ - muladd_fast(a->d[0], a->d[0]); - extract_fast(l[0]); - muladd2(a->d[0], a->d[1]); - extract(l[1]); - muladd2(a->d[0], a->d[2]); - muladd(a->d[1], a->d[1]); - extract(l[2]); - muladd2(a->d[0], a->d[3]); - muladd2(a->d[1], a->d[2]); - extract(l[3]); - muladd2(a->d[1], a->d[3]); - muladd(a->d[2], a->d[2]); - extract(l[4]); - muladd2(a->d[2], a->d[3]); - extract(l[5]); - muladd_fast(a->d[3], a->d[3]); - extract_fast(l[6]); - VERIFY_CHECK(c1 == 0); - l[7] = c0; -#endif -} - #undef sumadd #undef sumadd_fast #undef muladd #undef muladd_fast -#undef muladd2 #undef extract #undef extract_fast @@ -908,12 +748,6 @@ return ret; } -static void secp256k1_scalar_sqr(secp256k1_scalar *r, const secp256k1_scalar *a) { - uint64_t l[8]; - secp256k1_scalar_sqr_512(l, a); - secp256k1_scalar_reduce_512(r, l); -} - static void secp256k1_scalar_split_128(secp256k1_scalar *r1, secp256k1_scalar *r2, const secp256k1_scalar *k) { r1->d[0] = k->d[0]; r1->d[1] = k->d[1]; diff --git a/src/secp256k1/src/scalar_8x32_impl.h b/src/secp256k1/src/scalar_8x32_impl.h --- a/src/secp256k1/src/scalar_8x32_impl.h +++ b/src/secp256k1/src/scalar_8x32_impl.h @@ -293,28 +293,6 @@ VERIFY_CHECK(c1 >= th); \ } -/** Add 2*a*b to the number defined by (c0,c1,c2). c2 must never overflow. */ -#define muladd2(a,b) { \ - uint32_t tl, th, th2, tl2; \ - { \ - uint64_t t = (uint64_t)a * b; \ - th = t >> 32; /* at most 0xFFFFFFFE */ \ - tl = t; \ - } \ - th2 = th + th; /* at most 0xFFFFFFFE (in case th was 0x7FFFFFFF) */ \ - c2 += (th2 < th); /* never overflows by contract (verified the next line) */ \ - VERIFY_CHECK((th2 >= th) || (c2 != 0)); \ - tl2 = tl + tl; /* at most 0xFFFFFFFE (in case the lowest 63 bits of tl were 0x7FFFFFFF) */ \ - th2 += (tl2 < tl); /* at most 0xFFFFFFFF */ \ - c0 += tl2; /* overflow is handled on the next line */ \ - th2 += (c0 < tl2); /* second overflow is handled on the next line */ \ - c2 += (c0 < tl2) & (th2 == 0); /* never overflows by contract (verified the next line) */ \ - VERIFY_CHECK((c0 >= tl2) || (th2 != 0) || (c2 != 0)); \ - c1 += th2; /* overflow is handled on the next line */ \ - c2 += (c1 < th2); /* never overflows by contract (verified the next line) */ \ - VERIFY_CHECK((c1 >= th2) || (c2 != 0)); \ -} - /** Add a to the number defined by (c0,c1,c2). c2 must never overflow. */ #define sumadd(a) { \ unsigned int over; \ @@ -578,71 +556,10 @@ l[15] = c0; } -static void secp256k1_scalar_sqr_512(uint32_t *l, const secp256k1_scalar *a) { - /* 96 bit accumulator. */ - uint32_t c0 = 0, c1 = 0, c2 = 0; - - /* l[0..15] = a[0..7]^2. */ - muladd_fast(a->d[0], a->d[0]); - extract_fast(l[0]); - muladd2(a->d[0], a->d[1]); - extract(l[1]); - muladd2(a->d[0], a->d[2]); - muladd(a->d[1], a->d[1]); - extract(l[2]); - muladd2(a->d[0], a->d[3]); - muladd2(a->d[1], a->d[2]); - extract(l[3]); - muladd2(a->d[0], a->d[4]); - muladd2(a->d[1], a->d[3]); - muladd(a->d[2], a->d[2]); - extract(l[4]); - muladd2(a->d[0], a->d[5]); - muladd2(a->d[1], a->d[4]); - muladd2(a->d[2], a->d[3]); - extract(l[5]); - muladd2(a->d[0], a->d[6]); - muladd2(a->d[1], a->d[5]); - muladd2(a->d[2], a->d[4]); - muladd(a->d[3], a->d[3]); - extract(l[6]); - muladd2(a->d[0], a->d[7]); - muladd2(a->d[1], a->d[6]); - muladd2(a->d[2], a->d[5]); - muladd2(a->d[3], a->d[4]); - extract(l[7]); - muladd2(a->d[1], a->d[7]); - muladd2(a->d[2], a->d[6]); - muladd2(a->d[3], a->d[5]); - muladd(a->d[4], a->d[4]); - extract(l[8]); - muladd2(a->d[2], a->d[7]); - muladd2(a->d[3], a->d[6]); - muladd2(a->d[4], a->d[5]); - extract(l[9]); - muladd2(a->d[3], a->d[7]); - muladd2(a->d[4], a->d[6]); - muladd(a->d[5], a->d[5]); - extract(l[10]); - muladd2(a->d[4], a->d[7]); - muladd2(a->d[5], a->d[6]); - extract(l[11]); - muladd2(a->d[5], a->d[7]); - muladd(a->d[6], a->d[6]); - extract(l[12]); - muladd2(a->d[6], a->d[7]); - extract(l[13]); - muladd_fast(a->d[7], a->d[7]); - extract_fast(l[14]); - VERIFY_CHECK(c1 == 0); - l[15] = c0; -} - #undef sumadd #undef sumadd_fast #undef muladd #undef muladd_fast -#undef muladd2 #undef extract #undef extract_fast @@ -668,12 +585,6 @@ return ret; } -static void secp256k1_scalar_sqr(secp256k1_scalar *r, const secp256k1_scalar *a) { - uint32_t l[16]; - secp256k1_scalar_sqr_512(l, a); - secp256k1_scalar_reduce_512(r, l); -} - static void secp256k1_scalar_split_128(secp256k1_scalar *r1, secp256k1_scalar *r2, const secp256k1_scalar *k) { r1->d[0] = k->d[0]; r1->d[1] = k->d[1]; diff --git a/src/secp256k1/src/scalar_low_impl.h b/src/secp256k1/src/scalar_low_impl.h --- a/src/secp256k1/src/scalar_low_impl.h +++ b/src/secp256k1/src/scalar_low_impl.h @@ -104,10 +104,6 @@ return ret; } -static void secp256k1_scalar_sqr(secp256k1_scalar *r, const secp256k1_scalar *a) { - *r = (*a * *a) % EXHAUSTIVE_TEST_ORDER; -} - static void secp256k1_scalar_split_128(secp256k1_scalar *r1, secp256k1_scalar *r2, const secp256k1_scalar *a) { *r1 = *a; *r2 = 0; diff --git a/src/secp256k1/src/tests.c b/src/secp256k1/src/tests.c --- a/src/secp256k1/src/tests.c +++ b/src/secp256k1/src/tests.c @@ -793,7 +793,7 @@ /** test with secp group order as order */ secp256k1_scalar_order_get_num(&order); random_scalar_order_test(&sqr); - secp256k1_scalar_sqr(&sqr, &sqr); + secp256k1_scalar_mul(&sqr, &sqr, &sqr); /* test residue */ secp256k1_scalar_get_num(&n, &sqr); CHECK(secp256k1_num_jacobi(&n, &order) == 1); @@ -1488,14 +1488,6 @@ CHECK(secp256k1_scalar_eq(&r1, &r2)); } - { - /* Test square. */ - secp256k1_scalar r1, r2; - secp256k1_scalar_sqr(&r1, &s1); - secp256k1_scalar_mul(&r2, &s1, &s1); - CHECK(secp256k1_scalar_eq(&r1, &r2)); - } - { /* Test multiplicative identity. */ secp256k1_scalar r1, v1; @@ -2187,12 +2179,6 @@ CHECK(!secp256k1_scalar_check_overflow(&zz)); CHECK(secp256k1_scalar_eq(&one, &zz)); } - secp256k1_scalar_mul(&z, &x, &x); - CHECK(!secp256k1_scalar_check_overflow(&z)); - secp256k1_scalar_sqr(&zz, &x); - CHECK(!secp256k1_scalar_check_overflow(&zz)); - CHECK(secp256k1_scalar_eq(&zz, &z)); - CHECK(secp256k1_scalar_eq(&r2, &zz)); } } }