Changeset View
Changeset View
Standalone View
Standalone View
src/secp256k1/src/ecmult_impl.h
/********************************************************************** | /***************************************************************************** | ||||
* Copyright (c) 2013, 2014 Pieter Wuille * | * Copyright (c) 2013, 2014, 2017 Pieter Wuille, Andrew Poelstra, Jonas Nick * | ||||
* Distributed under the MIT software license, see the accompanying * | * Distributed under the MIT software license, see the accompanying * | ||||
* file COPYING or http://www.opensource.org/licenses/mit-license.php.* | * file COPYING or http://www.opensource.org/licenses/mit-license.php. * | ||||
**********************************************************************/ | *****************************************************************************/ | ||||
#ifndef SECP256K1_ECMULT_IMPL_H | #ifndef SECP256K1_ECMULT_IMPL_H | ||||
#define SECP256K1_ECMULT_IMPL_H | #define SECP256K1_ECMULT_IMPL_H | ||||
#include <string.h> | #include <string.h> | ||||
#include <stdint.h> | |||||
#include "group.h" | #include "group.h" | ||||
#include "scalar.h" | #include "scalar.h" | ||||
#include "ecmult.h" | #include "ecmult.h" | ||||
#if defined(EXHAUSTIVE_TEST_ORDER) | #if defined(EXHAUSTIVE_TEST_ORDER) | ||||
/* We need to lower these values for exhaustive tests because | /* We need to lower these values for exhaustive tests because | ||||
* the tables cannot have infinities in them (this breaks the | * the tables cannot have infinities in them (this breaks the | ||||
Show All 17 Lines | |||||
/** Two tables for window size 15: 1.375 MiB. */ | /** Two tables for window size 15: 1.375 MiB. */ | ||||
#define WINDOW_G 15 | #define WINDOW_G 15 | ||||
#else | #else | ||||
/** One table for window size 16: 1.375 MiB. */ | /** One table for window size 16: 1.375 MiB. */ | ||||
#define WINDOW_G 16 | #define WINDOW_G 16 | ||||
#endif | #endif | ||||
#endif | #endif | ||||
#ifdef USE_ENDOMORPHISM | |||||
#define WNAF_BITS 128 | |||||
#else | |||||
#define WNAF_BITS 256 | |||||
#endif | |||||
#define WNAF_SIZE(w) ((WNAF_BITS + (w) - 1) / (w)) | |||||
/** The number of entries a table with precomputed multiples needs to have. */ | /** The number of entries a table with precomputed multiples needs to have. */ | ||||
#define ECMULT_TABLE_SIZE(w) (1 << ((w)-2)) | #define ECMULT_TABLE_SIZE(w) (1 << ((w)-2)) | ||||
/* The number of objects allocated on the scratch space for ecmult_multi algorithms */ | |||||
#define PIPPENGER_SCRATCH_OBJECTS 6 | |||||
#define STRAUSS_SCRATCH_OBJECTS 6 | |||||
#define PIPPENGER_MAX_BUCKET_WINDOW 12 | |||||
/* Minimum number of points for which pippenger_wnaf is faster than strauss wnaf */ | |||||
#ifdef USE_ENDOMORPHISM | |||||
#define ECMULT_PIPPENGER_THRESHOLD 88 | |||||
#else | |||||
#define ECMULT_PIPPENGER_THRESHOLD 160 | |||||
#endif | |||||
#ifdef USE_ENDOMORPHISM | |||||
#define ECMULT_MAX_POINTS_PER_BATCH 5000000 | |||||
#else | |||||
#define ECMULT_MAX_POINTS_PER_BATCH 10000000 | |||||
#endif | |||||
/** Fill a table 'prej' with precomputed odd multiples of a. Prej will contain | /** Fill a table 'prej' with precomputed odd multiples of a. Prej will contain | ||||
* the values [1*a,3*a,...,(2*n-1)*a], so it space for n values. zr[0] will | * the values [1*a,3*a,...,(2*n-1)*a], so it space for n values. zr[0] will | ||||
* contain prej[0].z / a.z. The other zr[i] values = prej[i].z / prej[i-1].z. | * contain prej[0].z / a.z. The other zr[i] values = prej[i].z / prej[i-1].z. | ||||
* Prej's Z values are undefined, except for the last value. | * Prej's Z values are undefined, except for the last value. | ||||
*/ | */ | ||||
static void secp256k1_ecmult_odd_multiples_table(int n, secp256k1_gej *prej, secp256k1_fe *zr, const secp256k1_gej *a) { | static void secp256k1_ecmult_odd_multiples_table(int n, secp256k1_gej *prej, secp256k1_fe *zr, const secp256k1_gej *a) { | ||||
secp256k1_gej d; | secp256k1_gej d; | ||||
secp256k1_ge a_ge, d_ge; | secp256k1_ge a_ge, d_ge; | ||||
▲ Show 20 Lines • Show All 223 Lines • ▼ Show 20 Lines | #ifdef VERIFY | ||||
CHECK(carry == 0); | CHECK(carry == 0); | ||||
while (bit < 256) { | while (bit < 256) { | ||||
CHECK(secp256k1_scalar_get_bits(&s, bit++, 1) == 0); | CHECK(secp256k1_scalar_get_bits(&s, bit++, 1) == 0); | ||||
} | } | ||||
#endif | #endif | ||||
return last_set_bit + 1; | return last_set_bit + 1; | ||||
} | } | ||||
static void secp256k1_ecmult(const secp256k1_ecmult_context *ctx, secp256k1_gej *r, const secp256k1_gej *a, const secp256k1_scalar *na, const secp256k1_scalar *ng) { | struct secp256k1_strauss_point_state { | ||||
secp256k1_ge pre_a[ECMULT_TABLE_SIZE(WINDOW_A)]; | |||||
secp256k1_ge tmpa; | |||||
secp256k1_fe Z; | |||||
#ifdef USE_ENDOMORPHISM | #ifdef USE_ENDOMORPHISM | ||||
secp256k1_ge pre_a_lam[ECMULT_TABLE_SIZE(WINDOW_A)]; | |||||
secp256k1_scalar na_1, na_lam; | secp256k1_scalar na_1, na_lam; | ||||
/* Splitted G factors. */ | |||||
secp256k1_scalar ng_1, ng_128; | |||||
int wnaf_na_1[130]; | int wnaf_na_1[130]; | ||||
int wnaf_na_lam[130]; | int wnaf_na_lam[130]; | ||||
int bits_na_1; | int bits_na_1; | ||||
int bits_na_lam; | int bits_na_lam; | ||||
int wnaf_ng_1[129]; | |||||
int bits_ng_1; | |||||
int wnaf_ng_128[129]; | |||||
int bits_ng_128; | |||||
#else | #else | ||||
int wnaf_na[256]; | int wnaf_na[256]; | ||||
int bits_na; | int bits_na; | ||||
#endif | |||||
size_t input_pos; | |||||
}; | |||||
struct secp256k1_strauss_state { | |||||
secp256k1_gej* prej; | |||||
secp256k1_fe* zr; | |||||
secp256k1_ge* pre_a; | |||||
#ifdef USE_ENDOMORPHISM | |||||
secp256k1_ge* pre_a_lam; | |||||
#endif | |||||
struct secp256k1_strauss_point_state* ps; | |||||
}; | |||||
static void secp256k1_ecmult_strauss_wnaf(const secp256k1_ecmult_context *ctx, const struct secp256k1_strauss_state *state, secp256k1_gej *r, int num, const secp256k1_gej *a, const secp256k1_scalar *na, const secp256k1_scalar *ng) { | |||||
secp256k1_ge tmpa; | |||||
secp256k1_fe Z; | |||||
#ifdef USE_ENDOMORPHISM | |||||
/* Splitted G factors. */ | |||||
secp256k1_scalar ng_1, ng_128; | |||||
int wnaf_ng_1[129]; | |||||
int bits_ng_1 = 0; | |||||
int wnaf_ng_128[129]; | |||||
int bits_ng_128 = 0; | |||||
#else | |||||
int wnaf_ng[256]; | int wnaf_ng[256]; | ||||
int bits_ng; | int bits_ng = 0; | ||||
#endif | #endif | ||||
int i; | int i; | ||||
int bits; | int bits = 0; | ||||
int np; | |||||
int no = 0; | |||||
for (np = 0; np < num; ++np) { | |||||
if (secp256k1_scalar_is_zero(&na[np]) || secp256k1_gej_is_infinity(&a[np])) { | |||||
continue; | |||||
} | |||||
state->ps[no].input_pos = np; | |||||
#ifdef USE_ENDOMORPHISM | #ifdef USE_ENDOMORPHISM | ||||
/* split na into na_1 and na_lam (where na = na_1 + na_lam*lambda, and na_1 and na_lam are ~128 bit) */ | /* split na into na_1 and na_lam (where na = na_1 + na_lam*lambda, and na_1 and na_lam are ~128 bit) */ | ||||
secp256k1_scalar_split_lambda(&na_1, &na_lam, na); | secp256k1_scalar_split_lambda(&state->ps[no].na_1, &state->ps[no].na_lam, &na[np]); | ||||
/* build wnaf representation for na_1 and na_lam. */ | /* build wnaf representation for na_1 and na_lam. */ | ||||
bits_na_1 = secp256k1_ecmult_wnaf(wnaf_na_1, 130, &na_1, WINDOW_A); | state->ps[no].bits_na_1 = secp256k1_ecmult_wnaf(state->ps[no].wnaf_na_1, 130, &state->ps[no].na_1, WINDOW_A); | ||||
bits_na_lam = secp256k1_ecmult_wnaf(wnaf_na_lam, 130, &na_lam, WINDOW_A); | state->ps[no].bits_na_lam = secp256k1_ecmult_wnaf(state->ps[no].wnaf_na_lam, 130, &state->ps[no].na_lam, WINDOW_A); | ||||
VERIFY_CHECK(bits_na_1 <= 130); | VERIFY_CHECK(state->ps[no].bits_na_1 <= 130); | ||||
VERIFY_CHECK(bits_na_lam <= 130); | VERIFY_CHECK(state->ps[no].bits_na_lam <= 130); | ||||
bits = bits_na_1; | if (state->ps[no].bits_na_1 > bits) { | ||||
if (bits_na_lam > bits) { | bits = state->ps[no].bits_na_1; | ||||
bits = bits_na_lam; | } | ||||
if (state->ps[no].bits_na_lam > bits) { | |||||
bits = state->ps[no].bits_na_lam; | |||||
} | } | ||||
#else | #else | ||||
/* build wnaf representation for na. */ | /* build wnaf representation for na. */ | ||||
bits_na = secp256k1_ecmult_wnaf(wnaf_na, 256, na, WINDOW_A); | state->ps[no].bits_na = secp256k1_ecmult_wnaf(state->ps[no].wnaf_na, 256, &na[np], WINDOW_A); | ||||
bits = bits_na; | if (state->ps[no].bits_na > bits) { | ||||
bits = state->ps[no].bits_na; | |||||
} | |||||
#endif | #endif | ||||
++no; | |||||
} | |||||
/* Calculate odd multiples of a. | /* Calculate odd multiples of a. | ||||
* All multiples are brought to the same Z 'denominator', which is stored | * All multiples are brought to the same Z 'denominator', which is stored | ||||
* in Z. Due to secp256k1' isomorphism we can do all operations pretending | * in Z. Due to secp256k1' isomorphism we can do all operations pretending | ||||
* that the Z coordinate was 1, use affine addition formulae, and correct | * that the Z coordinate was 1, use affine addition formulae, and correct | ||||
* the Z coordinate of the result once at the end. | * the Z coordinate of the result once at the end. | ||||
* The exception is the precomputed G table points, which are actually | * The exception is the precomputed G table points, which are actually | ||||
* affine. Compared to the base used for other points, they have a Z ratio | * affine. Compared to the base used for other points, they have a Z ratio | ||||
* of 1/Z, so we can use secp256k1_gej_add_zinv_var, which uses the same | * of 1/Z, so we can use secp256k1_gej_add_zinv_var, which uses the same | ||||
* isomorphism to efficiently add with a known Z inverse. | * isomorphism to efficiently add with a known Z inverse. | ||||
*/ | */ | ||||
secp256k1_ecmult_odd_multiples_table_globalz_windowa(pre_a, &Z, a); | if (no > 0) { | ||||
/* Compute the odd multiples in Jacobian form. */ | |||||
secp256k1_ecmult_odd_multiples_table(ECMULT_TABLE_SIZE(WINDOW_A), state->prej, state->zr, &a[state->ps[0].input_pos]); | |||||
for (np = 1; np < no; ++np) { | |||||
secp256k1_gej tmp = a[state->ps[np].input_pos]; | |||||
#ifdef VERIFY | |||||
secp256k1_fe_normalize_var(&(state->prej[(np - 1) * ECMULT_TABLE_SIZE(WINDOW_A) + ECMULT_TABLE_SIZE(WINDOW_A) - 1].z)); | |||||
#endif | |||||
secp256k1_gej_rescale(&tmp, &(state->prej[(np - 1) * ECMULT_TABLE_SIZE(WINDOW_A) + ECMULT_TABLE_SIZE(WINDOW_A) - 1].z)); | |||||
secp256k1_ecmult_odd_multiples_table(ECMULT_TABLE_SIZE(WINDOW_A), state->prej + np * ECMULT_TABLE_SIZE(WINDOW_A), state->zr + np * ECMULT_TABLE_SIZE(WINDOW_A), &tmp); | |||||
secp256k1_fe_mul(state->zr + np * ECMULT_TABLE_SIZE(WINDOW_A), state->zr + np * ECMULT_TABLE_SIZE(WINDOW_A), &(a[state->ps[np].input_pos].z)); | |||||
} | |||||
/* Bring them to the same Z denominator. */ | |||||
secp256k1_ge_globalz_set_table_gej(ECMULT_TABLE_SIZE(WINDOW_A) * no, state->pre_a, &Z, state->prej, state->zr); | |||||
} else { | |||||
secp256k1_fe_set_int(&Z, 1); | |||||
} | |||||
#ifdef USE_ENDOMORPHISM | #ifdef USE_ENDOMORPHISM | ||||
for (np = 0; np < no; ++np) { | |||||
for (i = 0; i < ECMULT_TABLE_SIZE(WINDOW_A); i++) { | for (i = 0; i < ECMULT_TABLE_SIZE(WINDOW_A); i++) { | ||||
secp256k1_ge_mul_lambda(&pre_a_lam[i], &pre_a[i]); | secp256k1_ge_mul_lambda(&state->pre_a_lam[np * ECMULT_TABLE_SIZE(WINDOW_A) + i], &state->pre_a[np * ECMULT_TABLE_SIZE(WINDOW_A) + i]); | ||||
} | |||||
} | } | ||||
if (ng) { | |||||
/* split ng into ng_1 and ng_128 (where gn = gn_1 + gn_128*2^128, and gn_1 and gn_128 are ~128 bit) */ | /* split ng into ng_1 and ng_128 (where gn = gn_1 + gn_128*2^128, and gn_1 and gn_128 are ~128 bit) */ | ||||
secp256k1_scalar_split_128(&ng_1, &ng_128, ng); | secp256k1_scalar_split_128(&ng_1, &ng_128, ng); | ||||
/* Build wnaf representation for ng_1 and ng_128 */ | /* Build wnaf representation for ng_1 and ng_128 */ | ||||
bits_ng_1 = secp256k1_ecmult_wnaf(wnaf_ng_1, 129, &ng_1, WINDOW_G); | bits_ng_1 = secp256k1_ecmult_wnaf(wnaf_ng_1, 129, &ng_1, WINDOW_G); | ||||
bits_ng_128 = secp256k1_ecmult_wnaf(wnaf_ng_128, 129, &ng_128, WINDOW_G); | bits_ng_128 = secp256k1_ecmult_wnaf(wnaf_ng_128, 129, &ng_128, WINDOW_G); | ||||
if (bits_ng_1 > bits) { | if (bits_ng_1 > bits) { | ||||
bits = bits_ng_1; | bits = bits_ng_1; | ||||
} | } | ||||
if (bits_ng_128 > bits) { | if (bits_ng_128 > bits) { | ||||
bits = bits_ng_128; | bits = bits_ng_128; | ||||
} | } | ||||
} | |||||
#else | #else | ||||
if (ng) { | |||||
bits_ng = secp256k1_ecmult_wnaf(wnaf_ng, 256, ng, WINDOW_G); | bits_ng = secp256k1_ecmult_wnaf(wnaf_ng, 256, ng, WINDOW_G); | ||||
if (bits_ng > bits) { | if (bits_ng > bits) { | ||||
bits = bits_ng; | bits = bits_ng; | ||||
} | } | ||||
} | |||||
#endif | #endif | ||||
secp256k1_gej_set_infinity(r); | secp256k1_gej_set_infinity(r); | ||||
for (i = bits - 1; i >= 0; i--) { | for (i = bits - 1; i >= 0; i--) { | ||||
int n; | int n; | ||||
secp256k1_gej_double_var(r, r, NULL); | secp256k1_gej_double_var(r, r, NULL); | ||||
#ifdef USE_ENDOMORPHISM | #ifdef USE_ENDOMORPHISM | ||||
if (i < bits_na_1 && (n = wnaf_na_1[i])) { | for (np = 0; np < no; ++np) { | ||||
ECMULT_TABLE_GET_GE(&tmpa, pre_a, n, WINDOW_A); | if (i < state->ps[np].bits_na_1 && (n = state->ps[np].wnaf_na_1[i])) { | ||||
ECMULT_TABLE_GET_GE(&tmpa, state->pre_a + np * ECMULT_TABLE_SIZE(WINDOW_A), n, WINDOW_A); | |||||
secp256k1_gej_add_ge_var(r, r, &tmpa, NULL); | secp256k1_gej_add_ge_var(r, r, &tmpa, NULL); | ||||
} | } | ||||
if (i < bits_na_lam && (n = wnaf_na_lam[i])) { | if (i < state->ps[np].bits_na_lam && (n = state->ps[np].wnaf_na_lam[i])) { | ||||
ECMULT_TABLE_GET_GE(&tmpa, pre_a_lam, n, WINDOW_A); | ECMULT_TABLE_GET_GE(&tmpa, state->pre_a_lam + np * ECMULT_TABLE_SIZE(WINDOW_A), n, WINDOW_A); | ||||
secp256k1_gej_add_ge_var(r, r, &tmpa, NULL); | secp256k1_gej_add_ge_var(r, r, &tmpa, NULL); | ||||
} | } | ||||
} | |||||
if (i < bits_ng_1 && (n = wnaf_ng_1[i])) { | if (i < bits_ng_1 && (n = wnaf_ng_1[i])) { | ||||
ECMULT_TABLE_GET_GE_STORAGE(&tmpa, *ctx->pre_g, n, WINDOW_G); | ECMULT_TABLE_GET_GE_STORAGE(&tmpa, *ctx->pre_g, n, WINDOW_G); | ||||
secp256k1_gej_add_zinv_var(r, r, &tmpa, &Z); | secp256k1_gej_add_zinv_var(r, r, &tmpa, &Z); | ||||
} | } | ||||
if (i < bits_ng_128 && (n = wnaf_ng_128[i])) { | if (i < bits_ng_128 && (n = wnaf_ng_128[i])) { | ||||
ECMULT_TABLE_GET_GE_STORAGE(&tmpa, *ctx->pre_g_128, n, WINDOW_G); | ECMULT_TABLE_GET_GE_STORAGE(&tmpa, *ctx->pre_g_128, n, WINDOW_G); | ||||
secp256k1_gej_add_zinv_var(r, r, &tmpa, &Z); | secp256k1_gej_add_zinv_var(r, r, &tmpa, &Z); | ||||
} | } | ||||
#else | #else | ||||
if (i < bits_na && (n = wnaf_na[i])) { | for (np = 0; np < no; ++np) { | ||||
ECMULT_TABLE_GET_GE(&tmpa, pre_a, n, WINDOW_A); | if (i < state->ps[np].bits_na && (n = state->ps[np].wnaf_na[i])) { | ||||
ECMULT_TABLE_GET_GE(&tmpa, state->pre_a + np * ECMULT_TABLE_SIZE(WINDOW_A), n, WINDOW_A); | |||||
secp256k1_gej_add_ge_var(r, r, &tmpa, NULL); | secp256k1_gej_add_ge_var(r, r, &tmpa, NULL); | ||||
} | } | ||||
} | |||||
if (i < bits_ng && (n = wnaf_ng[i])) { | if (i < bits_ng && (n = wnaf_ng[i])) { | ||||
ECMULT_TABLE_GET_GE_STORAGE(&tmpa, *ctx->pre_g, n, WINDOW_G); | ECMULT_TABLE_GET_GE_STORAGE(&tmpa, *ctx->pre_g, n, WINDOW_G); | ||||
secp256k1_gej_add_zinv_var(r, r, &tmpa, &Z); | secp256k1_gej_add_zinv_var(r, r, &tmpa, &Z); | ||||
} | } | ||||
#endif | #endif | ||||
} | } | ||||
if (!r->infinity) { | if (!r->infinity) { | ||||
secp256k1_fe_mul(&r->z, &r->z, &Z); | secp256k1_fe_mul(&r->z, &r->z, &Z); | ||||
} | } | ||||
} | } | ||||
static void secp256k1_ecmult(const secp256k1_ecmult_context *ctx, secp256k1_gej *r, const secp256k1_gej *a, const secp256k1_scalar *na, const secp256k1_scalar *ng) { | |||||
secp256k1_gej prej[ECMULT_TABLE_SIZE(WINDOW_A)]; | |||||
secp256k1_fe zr[ECMULT_TABLE_SIZE(WINDOW_A)]; | |||||
secp256k1_ge pre_a[ECMULT_TABLE_SIZE(WINDOW_A)]; | |||||
struct secp256k1_strauss_point_state ps[1]; | |||||
#ifdef USE_ENDOMORPHISM | |||||
secp256k1_ge pre_a_lam[ECMULT_TABLE_SIZE(WINDOW_A)]; | |||||
#endif | |||||
struct secp256k1_strauss_state state; | |||||
state.prej = prej; | |||||
state.zr = zr; | |||||
state.pre_a = pre_a; | |||||
#ifdef USE_ENDOMORPHISM | |||||
state.pre_a_lam = pre_a_lam; | |||||
#endif | |||||
state.ps = ps; | |||||
secp256k1_ecmult_strauss_wnaf(ctx, &state, r, 1, a, na, ng); | |||||
} | |||||
static size_t secp256k1_strauss_scratch_size(size_t n_points) { | |||||
#ifdef USE_ENDOMORPHISM | |||||
static const size_t point_size = (2 * sizeof(secp256k1_ge) + sizeof(secp256k1_gej) + sizeof(secp256k1_fe)) * ECMULT_TABLE_SIZE(WINDOW_A) + sizeof(struct secp256k1_strauss_point_state) + sizeof(secp256k1_gej) + sizeof(secp256k1_scalar); | |||||
#else | |||||
static const size_t point_size = (sizeof(secp256k1_ge) + sizeof(secp256k1_gej) + sizeof(secp256k1_fe)) * ECMULT_TABLE_SIZE(WINDOW_A) + sizeof(struct secp256k1_strauss_point_state) + sizeof(secp256k1_gej) + sizeof(secp256k1_scalar); | |||||
#endif | |||||
return n_points*point_size; | |||||
} | |||||
static int secp256k1_ecmult_strauss_batch(const secp256k1_ecmult_context *ctx, secp256k1_scratch *scratch, secp256k1_gej *r, const secp256k1_scalar *inp_g_sc, secp256k1_ecmult_multi_callback cb, void *cbdata, size_t n_points, size_t cb_offset) { | |||||
secp256k1_gej* points; | |||||
secp256k1_scalar* scalars; | |||||
struct secp256k1_strauss_state state; | |||||
size_t i; | |||||
secp256k1_gej_set_infinity(r); | |||||
if (inp_g_sc == NULL && n_points == 0) { | |||||
return 1; | |||||
} | |||||
if (!secp256k1_scratch_resize(scratch, secp256k1_strauss_scratch_size(n_points), STRAUSS_SCRATCH_OBJECTS)) { | |||||
return 0; | |||||
} | |||||
secp256k1_scratch_reset(scratch); | |||||
points = (secp256k1_gej*)secp256k1_scratch_alloc(scratch, n_points * sizeof(secp256k1_gej)); | |||||
scalars = (secp256k1_scalar*)secp256k1_scratch_alloc(scratch, n_points * sizeof(secp256k1_scalar)); | |||||
state.prej = (secp256k1_gej*)secp256k1_scratch_alloc(scratch, n_points * ECMULT_TABLE_SIZE(WINDOW_A) * sizeof(secp256k1_gej)); | |||||
state.zr = (secp256k1_fe*)secp256k1_scratch_alloc(scratch, n_points * ECMULT_TABLE_SIZE(WINDOW_A) * sizeof(secp256k1_fe)); | |||||
#ifdef USE_ENDOMORPHISM | |||||
state.pre_a = (secp256k1_ge*)secp256k1_scratch_alloc(scratch, n_points * 2 * ECMULT_TABLE_SIZE(WINDOW_A) * sizeof(secp256k1_ge)); | |||||
state.pre_a_lam = state.pre_a + n_points * ECMULT_TABLE_SIZE(WINDOW_A); | |||||
#else | |||||
state.pre_a = (secp256k1_ge*)secp256k1_scratch_alloc(scratch, n_points * ECMULT_TABLE_SIZE(WINDOW_A) * sizeof(secp256k1_ge)); | |||||
#endif | |||||
state.ps = (struct secp256k1_strauss_point_state*)secp256k1_scratch_alloc(scratch, n_points * sizeof(struct secp256k1_strauss_point_state)); | |||||
for (i = 0; i < n_points; i++) { | |||||
secp256k1_ge point; | |||||
if (!cb(&scalars[i], &point, i+cb_offset, cbdata)) return 0; | |||||
secp256k1_gej_set_ge(&points[i], &point); | |||||
} | |||||
secp256k1_ecmult_strauss_wnaf(ctx, &state, r, n_points, points, scalars, inp_g_sc); | |||||
return 1; | |||||
} | |||||
/* Wrapper for secp256k1_ecmult_multi_func interface */ | |||||
static int secp256k1_ecmult_strauss_batch_single(const secp256k1_ecmult_context *actx, secp256k1_scratch *scratch, secp256k1_gej *r, const secp256k1_scalar *inp_g_sc, secp256k1_ecmult_multi_callback cb, void *cbdata, size_t n) { | |||||
return secp256k1_ecmult_strauss_batch(actx, scratch, r, inp_g_sc, cb, cbdata, n, 0); | |||||
} | |||||
static size_t secp256k1_strauss_max_points(secp256k1_scratch *scratch) { | |||||
return secp256k1_scratch_max_allocation(scratch, STRAUSS_SCRATCH_OBJECTS) / secp256k1_strauss_scratch_size(1); | |||||
} | |||||
/** Convert a number to WNAF notation. | |||||
* The number becomes represented by sum(2^{wi} * wnaf[i], i=0..WNAF_SIZE(w)+1) - return_val. | |||||
* It has the following guarantees: | |||||
* - each wnaf[i] is either 0 or an odd integer between -(1 << w) and (1 << w) | |||||
* - the number of words set is always WNAF_SIZE(w) | |||||
* - the returned skew is 0 without endomorphism, or 0 or 1 with endomorphism | |||||
*/ | |||||
static int secp256k1_wnaf_fixed(int *wnaf, const secp256k1_scalar *s, int w) { | |||||
int sign = 0; | |||||
int skew = 0; | |||||
int pos = 1; | |||||
#ifndef USE_ENDOMORPHISM | |||||
secp256k1_scalar neg_s; | |||||
#endif | |||||
const secp256k1_scalar *work = s; | |||||
if (secp256k1_scalar_is_zero(s)) { | |||||
while (pos * w < WNAF_BITS) { | |||||
wnaf[pos] = 0; | |||||
++pos; | |||||
} | |||||
return 0; | |||||
} | |||||
if (secp256k1_scalar_is_even(s)) { | |||||
#ifdef USE_ENDOMORPHISM | |||||
skew = 1; | |||||
#else | |||||
secp256k1_scalar_negate(&neg_s, s); | |||||
work = &neg_s; | |||||
sign = -1; | |||||
#endif | |||||
} | |||||
wnaf[0] = (secp256k1_scalar_get_bits_var(work, 0, w) + skew + sign) ^ sign; | |||||
while (pos * w < WNAF_BITS) { | |||||
int now = w; | |||||
int val; | |||||
if (now + pos * w > WNAF_BITS) { | |||||
now = WNAF_BITS - pos * w; | |||||
} | |||||
val = secp256k1_scalar_get_bits_var(work, pos * w, now); | |||||
if ((val & 1) == 0) { | |||||
wnaf[pos - 1] -= ((1 << w) + sign) ^ sign; | |||||
wnaf[pos] = (val + 1 + sign) ^ sign; | |||||
} else { | |||||
wnaf[pos] = (val + sign) ^ sign; | |||||
} | |||||
++pos; | |||||
} | |||||
VERIFY_CHECK(pos == WNAF_SIZE(w)); | |||||
return skew; | |||||
} | |||||
struct secp256k1_pippenger_point_state { | |||||
int skew_na; | |||||
size_t input_pos; | |||||
}; | |||||
struct secp256k1_pippenger_state { | |||||
int *wnaf_na; | |||||
struct secp256k1_pippenger_point_state* ps; | |||||
}; | |||||
/* | |||||
* pippenger_wnaf computes the result of a multi-point multiplication as | |||||
* follows: The scalars are brought into wnaf with n_wnaf elements each. Then | |||||
* for every i < n_wnaf, first each point is added to a "bucket" corresponding | |||||
* to the point's wnaf[i]. Second, the buckets are added together such that | |||||
* r += 1*bucket[0] + 3*bucket[1] + 5*bucket[2] + ... | |||||
*/ | |||||
static int secp256k1_ecmult_pippenger_wnaf(secp256k1_gej *buckets, int bucket_window, struct secp256k1_pippenger_state *state, secp256k1_gej *r, const secp256k1_scalar *sc, const secp256k1_ge *pt, size_t num) { | |||||
size_t n_wnaf = WNAF_SIZE(bucket_window+1); | |||||
size_t np; | |||||
size_t no = 0; | |||||
int i; | |||||
int j; | |||||
for (np = 0; np < num; ++np) { | |||||
if (secp256k1_scalar_is_zero(&sc[np]) || secp256k1_ge_is_infinity(&pt[np])) { | |||||
continue; | |||||
} | |||||
state->ps[no].input_pos = np; | |||||
state->ps[no].skew_na = secp256k1_wnaf_fixed(&state->wnaf_na[no*n_wnaf], &sc[np], bucket_window+1); | |||||
no++; | |||||
} | |||||
secp256k1_gej_set_infinity(r); | |||||
if (no == 0) { | |||||
return 1; | |||||
} | |||||
for (i = n_wnaf - 1; i >= 0; i--) { | |||||
secp256k1_gej running_sum; | |||||
for(j = 0; j < ECMULT_TABLE_SIZE(bucket_window+2); j++) { | |||||
secp256k1_gej_set_infinity(&buckets[j]); | |||||
} | |||||
for (np = 0; np < no; ++np) { | |||||
int n = state->wnaf_na[np*n_wnaf + i]; | |||||
struct secp256k1_pippenger_point_state point_state = state->ps[np]; | |||||
secp256k1_ge tmp; | |||||
int idx; | |||||
#ifdef USE_ENDOMORPHISM | |||||
if (i == 0) { | |||||
/* correct for wnaf skew */ | |||||
int skew = point_state.skew_na; | |||||
if (skew) { | |||||
secp256k1_ge_neg(&tmp, &pt[point_state.input_pos]); | |||||
secp256k1_gej_add_ge_var(&buckets[0], &buckets[0], &tmp, NULL); | |||||
} | |||||
} | |||||
#endif | |||||
if (n > 0) { | |||||
idx = (n - 1)/2; | |||||
secp256k1_gej_add_ge_var(&buckets[idx], &buckets[idx], &pt[point_state.input_pos], NULL); | |||||
} else if (n < 0) { | |||||
idx = -(n + 1)/2; | |||||
secp256k1_ge_neg(&tmp, &pt[point_state.input_pos]); | |||||
secp256k1_gej_add_ge_var(&buckets[idx], &buckets[idx], &tmp, NULL); | |||||
} | |||||
} | |||||
for(j = 0; j < bucket_window; j++) { | |||||
secp256k1_gej_double_var(r, r, NULL); | |||||
} | |||||
secp256k1_gej_set_infinity(&running_sum); | |||||
/* Accumulate the sum: bucket[0] + 3*bucket[1] + 5*bucket[2] + 7*bucket[3] + ... | |||||
* = bucket[0] + bucket[1] + bucket[2] + bucket[3] + ... | |||||
* + 2 * (bucket[1] + 2*bucket[2] + 3*bucket[3] + ...) | |||||
* using an intermediate running sum: | |||||
* running_sum = bucket[0] + bucket[1] + bucket[2] + ... | |||||
* | |||||
* The doubling is done implicitly by deferring the final window doubling (of 'r'). | |||||
*/ | |||||
for(j = ECMULT_TABLE_SIZE(bucket_window+2) - 1; j > 0; j--) { | |||||
secp256k1_gej_add_var(&running_sum, &running_sum, &buckets[j], NULL); | |||||
secp256k1_gej_add_var(r, r, &running_sum, NULL); | |||||
} | |||||
secp256k1_gej_add_var(&running_sum, &running_sum, &buckets[0], NULL); | |||||
secp256k1_gej_double_var(r, r, NULL); | |||||
secp256k1_gej_add_var(r, r, &running_sum, NULL); | |||||
} | |||||
return 1; | |||||
} | |||||
/** | |||||
* Returns optimal bucket_window (number of bits of a scalar represented by a | |||||
* set of buckets) for a given number of points. | |||||
*/ | |||||
static int secp256k1_pippenger_bucket_window(size_t n) { | |||||
#ifdef USE_ENDOMORPHISM | |||||
if (n <= 1) { | |||||
return 1; | |||||
} else if (n <= 4) { | |||||
return 2; | |||||
} else if (n <= 20) { | |||||
return 3; | |||||
} else if (n <= 57) { | |||||
return 4; | |||||
} else if (n <= 136) { | |||||
return 5; | |||||
} else if (n <= 235) { | |||||
return 6; | |||||
} else if (n <= 1260) { | |||||
return 7; | |||||
} else if (n <= 4420) { | |||||
return 9; | |||||
} else if (n <= 7880) { | |||||
return 10; | |||||
} else if (n <= 16050) { | |||||
return 11; | |||||
} else { | |||||
return PIPPENGER_MAX_BUCKET_WINDOW; | |||||
} | |||||
#else | |||||
if (n <= 1) { | |||||
return 1; | |||||
} else if (n <= 11) { | |||||
return 2; | |||||
} else if (n <= 45) { | |||||
return 3; | |||||
} else if (n <= 100) { | |||||
return 4; | |||||
} else if (n <= 275) { | |||||
return 5; | |||||
} else if (n <= 625) { | |||||
return 6; | |||||
} else if (n <= 1850) { | |||||
return 7; | |||||
} else if (n <= 3400) { | |||||
return 8; | |||||
} else if (n <= 9630) { | |||||
return 9; | |||||
} else if (n <= 17900) { | |||||
return 10; | |||||
} else if (n <= 32800) { | |||||
return 11; | |||||
} else { | |||||
return PIPPENGER_MAX_BUCKET_WINDOW; | |||||
} | |||||
#endif | |||||
} | |||||
/** | |||||
* Returns the maximum optimal number of points for a bucket_window. | |||||
*/ | |||||
static size_t secp256k1_pippenger_bucket_window_inv(int bucket_window) { | |||||
switch(bucket_window) { | |||||
#ifdef USE_ENDOMORPHISM | |||||
case 1: return 1; | |||||
case 2: return 4; | |||||
case 3: return 20; | |||||
case 4: return 57; | |||||
case 5: return 136; | |||||
case 6: return 235; | |||||
case 7: return 1260; | |||||
case 8: return 1260; | |||||
case 9: return 4420; | |||||
case 10: return 7880; | |||||
case 11: return 16050; | |||||
case PIPPENGER_MAX_BUCKET_WINDOW: return SIZE_MAX; | |||||
#else | |||||
case 1: return 1; | |||||
case 2: return 11; | |||||
case 3: return 45; | |||||
case 4: return 100; | |||||
case 5: return 275; | |||||
case 6: return 625; | |||||
case 7: return 1850; | |||||
case 8: return 3400; | |||||
case 9: return 9630; | |||||
case 10: return 17900; | |||||
case 11: return 32800; | |||||
case PIPPENGER_MAX_BUCKET_WINDOW: return SIZE_MAX; | |||||
#endif | |||||
} | |||||
return 0; | |||||
} | |||||
#ifdef USE_ENDOMORPHISM | |||||
SECP256K1_INLINE static void secp256k1_ecmult_endo_split(secp256k1_scalar *s1, secp256k1_scalar *s2, secp256k1_ge *p1, secp256k1_ge *p2) { | |||||
secp256k1_scalar tmp = *s1; | |||||
secp256k1_scalar_split_lambda(s1, s2, &tmp); | |||||
secp256k1_ge_mul_lambda(p2, p1); | |||||
if (secp256k1_scalar_is_high(s1)) { | |||||
secp256k1_scalar_negate(s1, s1); | |||||
secp256k1_ge_neg(p1, p1); | |||||
} | |||||
if (secp256k1_scalar_is_high(s2)) { | |||||
secp256k1_scalar_negate(s2, s2); | |||||
secp256k1_ge_neg(p2, p2); | |||||
} | |||||
} | |||||
#endif | |||||
/** | |||||
* Returns the scratch size required for a given number of points (excluding | |||||
* base point G) without considering alignment. | |||||
*/ | |||||
static size_t secp256k1_pippenger_scratch_size(size_t n_points, int bucket_window) { | |||||
#ifdef USE_ENDOMORPHISM | |||||
size_t entries = 2*n_points + 2; | |||||
#else | |||||
size_t entries = n_points + 1; | |||||
#endif | |||||
size_t entry_size = sizeof(secp256k1_ge) + sizeof(secp256k1_scalar) + sizeof(struct secp256k1_pippenger_point_state) + (WNAF_SIZE(bucket_window+1)+1)*sizeof(int); | |||||
return ((1<<bucket_window) * sizeof(secp256k1_gej) + sizeof(struct secp256k1_pippenger_state) + entries * entry_size); | |||||
} | |||||
static int secp256k1_ecmult_pippenger_batch(const secp256k1_ecmult_context *ctx, secp256k1_scratch *scratch, secp256k1_gej *r, const secp256k1_scalar *inp_g_sc, secp256k1_ecmult_multi_callback cb, void *cbdata, size_t n_points, size_t cb_offset) { | |||||
/* Use 2(n+1) with the endomorphism, n+1 without, when calculating batch | |||||
* sizes. The reason for +1 is that we add the G scalar to the list of | |||||
* other scalars. */ | |||||
#ifdef USE_ENDOMORPHISM | |||||
size_t entries = 2*n_points + 2; | |||||
#else | |||||
size_t entries = n_points + 1; | |||||
#endif | |||||
secp256k1_ge *points; | |||||
secp256k1_scalar *scalars; | |||||
secp256k1_gej *buckets; | |||||
struct secp256k1_pippenger_state *state_space; | |||||
size_t idx = 0; | |||||
size_t point_idx = 0; | |||||
int i, j; | |||||
int bucket_window; | |||||
(void)ctx; | |||||
secp256k1_gej_set_infinity(r); | |||||
if (inp_g_sc == NULL && n_points == 0) { | |||||
return 1; | |||||
} | |||||
bucket_window = secp256k1_pippenger_bucket_window(n_points); | |||||
if (!secp256k1_scratch_resize(scratch, secp256k1_pippenger_scratch_size(n_points, bucket_window), PIPPENGER_SCRATCH_OBJECTS)) { | |||||
return 0; | |||||
} | |||||
secp256k1_scratch_reset(scratch); | |||||
points = (secp256k1_ge *) secp256k1_scratch_alloc(scratch, entries * sizeof(*points)); | |||||
scalars = (secp256k1_scalar *) secp256k1_scratch_alloc(scratch, entries * sizeof(*scalars)); | |||||
state_space = (struct secp256k1_pippenger_state *) secp256k1_scratch_alloc(scratch, sizeof(*state_space)); | |||||
state_space->ps = (struct secp256k1_pippenger_point_state *) secp256k1_scratch_alloc(scratch, entries * sizeof(*state_space->ps)); | |||||
state_space->wnaf_na = (int *) secp256k1_scratch_alloc(scratch, entries*(WNAF_SIZE(bucket_window+1)) * sizeof(int)); | |||||
buckets = (secp256k1_gej *) secp256k1_scratch_alloc(scratch, (1<<bucket_window) * sizeof(*buckets)); | |||||
if (inp_g_sc != NULL) { | |||||
scalars[0] = *inp_g_sc; | |||||
points[0] = secp256k1_ge_const_g; | |||||
idx++; | |||||
#ifdef USE_ENDOMORPHISM | |||||
secp256k1_ecmult_endo_split(&scalars[0], &scalars[1], &points[0], &points[1]); | |||||
idx++; | |||||
#endif | |||||
} | |||||
while (point_idx < n_points) { | |||||
if (!cb(&scalars[idx], &points[idx], point_idx + cb_offset, cbdata)) { | |||||
return 0; | |||||
} | |||||
idx++; | |||||
#ifdef USE_ENDOMORPHISM | |||||
secp256k1_ecmult_endo_split(&scalars[idx - 1], &scalars[idx], &points[idx - 1], &points[idx]); | |||||
idx++; | |||||
#endif | |||||
point_idx++; | |||||
} | |||||
secp256k1_ecmult_pippenger_wnaf(buckets, bucket_window, state_space, r, scalars, points, idx); | |||||
/* Clear data */ | |||||
for(i = 0; (size_t)i < idx; i++) { | |||||
secp256k1_scalar_clear(&scalars[i]); | |||||
state_space->ps[i].skew_na = 0; | |||||
for(j = 0; j < WNAF_SIZE(bucket_window+1); j++) { | |||||
state_space->wnaf_na[i * WNAF_SIZE(bucket_window+1) + j] = 0; | |||||
} | |||||
} | |||||
for(i = 0; i < 1<<bucket_window; i++) { | |||||
secp256k1_gej_clear(&buckets[i]); | |||||
} | |||||
return 1; | |||||
} | |||||
/* Wrapper for secp256k1_ecmult_multi_func interface */ | |||||
static int secp256k1_ecmult_pippenger_batch_single(const secp256k1_ecmult_context *actx, secp256k1_scratch *scratch, secp256k1_gej *r, const secp256k1_scalar *inp_g_sc, secp256k1_ecmult_multi_callback cb, void *cbdata, size_t n) { | |||||
return secp256k1_ecmult_pippenger_batch(actx, scratch, r, inp_g_sc, cb, cbdata, n, 0); | |||||
} | |||||
/** | |||||
* Returns the maximum number of points in addition to G that can be used with | |||||
* a given scratch space. The function ensures that fewer points may also be | |||||
* used. | |||||
*/ | |||||
static size_t secp256k1_pippenger_max_points(secp256k1_scratch *scratch) { | |||||
size_t max_alloc = secp256k1_scratch_max_allocation(scratch, PIPPENGER_SCRATCH_OBJECTS); | |||||
int bucket_window; | |||||
size_t res = 0; | |||||
for (bucket_window = 1; bucket_window <= PIPPENGER_MAX_BUCKET_WINDOW; bucket_window++) { | |||||
size_t n_points; | |||||
size_t max_points = secp256k1_pippenger_bucket_window_inv(bucket_window); | |||||
size_t space_for_points; | |||||
size_t space_overhead; | |||||
size_t entry_size = sizeof(secp256k1_ge) + sizeof(secp256k1_scalar) + sizeof(struct secp256k1_pippenger_point_state) + (WNAF_SIZE(bucket_window+1)+1)*sizeof(int); | |||||
#ifdef USE_ENDOMORPHISM | |||||
entry_size = 2*entry_size; | |||||
#endif | |||||
space_overhead = ((1<<bucket_window) * sizeof(secp256k1_gej) + entry_size + sizeof(struct secp256k1_pippenger_state)); | |||||
if (space_overhead > max_alloc) { | |||||
break; | |||||
} | |||||
space_for_points = max_alloc - space_overhead; | |||||
n_points = space_for_points/entry_size; | |||||
n_points = n_points > max_points ? max_points : n_points; | |||||
if (n_points > res) { | |||||
res = n_points; | |||||
} | |||||
if (n_points < max_points) { | |||||
/* A larger bucket_window may support even more points. But if we | |||||
* would choose that then the caller couldn't safely use any number | |||||
* smaller than what this function returns */ | |||||
break; | |||||
} | |||||
} | |||||
return res; | |||||
} | |||||
typedef int (*secp256k1_ecmult_multi_func)(const secp256k1_ecmult_context*, secp256k1_scratch*, secp256k1_gej*, const secp256k1_scalar*, secp256k1_ecmult_multi_callback cb, void*, size_t); | |||||
static int secp256k1_ecmult_multi_var(const secp256k1_ecmult_context *ctx, secp256k1_scratch *scratch, secp256k1_gej *r, const secp256k1_scalar *inp_g_sc, secp256k1_ecmult_multi_callback cb, void *cbdata, size_t n) { | |||||
size_t i; | |||||
int (*f)(const secp256k1_ecmult_context*, secp256k1_scratch*, secp256k1_gej*, const secp256k1_scalar*, secp256k1_ecmult_multi_callback cb, void*, size_t, size_t); | |||||
size_t max_points; | |||||
size_t n_batches; | |||||
size_t n_batch_points; | |||||
secp256k1_gej_set_infinity(r); | |||||
if (inp_g_sc == NULL && n == 0) { | |||||
return 1; | |||||
} else if (n == 0) { | |||||
secp256k1_scalar szero; | |||||
secp256k1_scalar_set_int(&szero, 0); | |||||
secp256k1_ecmult(ctx, r, r, &szero, inp_g_sc); | |||||
return 1; | |||||
} | |||||
max_points = secp256k1_pippenger_max_points(scratch); | |||||
if (max_points == 0) { | |||||
return 0; | |||||
} else if (max_points > ECMULT_MAX_POINTS_PER_BATCH) { | |||||
max_points = ECMULT_MAX_POINTS_PER_BATCH; | |||||
} | |||||
n_batches = (n+max_points-1)/max_points; | |||||
n_batch_points = (n+n_batches-1)/n_batches; | |||||
if (n_batch_points >= ECMULT_PIPPENGER_THRESHOLD) { | |||||
f = secp256k1_ecmult_pippenger_batch; | |||||
} else { | |||||
max_points = secp256k1_strauss_max_points(scratch); | |||||
if (max_points == 0) { | |||||
return 0; | |||||
} | |||||
n_batches = (n+max_points-1)/max_points; | |||||
n_batch_points = (n+n_batches-1)/n_batches; | |||||
f = secp256k1_ecmult_strauss_batch; | |||||
} | |||||
for(i = 0; i < n_batches; i++) { | |||||
size_t nbp = n < n_batch_points ? n : n_batch_points; | |||||
size_t offset = n_batch_points*i; | |||||
secp256k1_gej tmp; | |||||
if (!f(ctx, scratch, &tmp, i == 0 ? inp_g_sc : NULL, cb, cbdata, nbp, offset)) { | |||||
return 0; | |||||
} | |||||
secp256k1_gej_add_var(r, r, &tmp, NULL); | |||||
n -= nbp; | |||||
} | |||||
return 1; | |||||
} | |||||
#endif /* SECP256K1_ECMULT_IMPL_H */ | #endif /* SECP256K1_ECMULT_IMPL_H */ |