diff --git a/src/secp256k1/src/bench_internal.c b/src/secp256k1/src/bench_internal.c
--- a/src/secp256k1/src/bench_internal.c
+++ b/src/secp256k1/src/bench_internal.c
@@ -251,7 +251,7 @@
     bench_inv *data = (bench_inv*)arg;
 
     for (i = 0; i < 20000; i++) {
-        secp256k1_wnaf_const(data->wnaf, data->scalar_x, WINDOW_A);
+        secp256k1_wnaf_const(data->wnaf, data->scalar_x, WINDOW_A, 256);
         secp256k1_scalar_add(&data->scalar_x, &data->scalar_x, &data->scalar_y);
     }
 }
diff --git a/src/secp256k1/src/ecmult_const.h b/src/secp256k1/src/ecmult_const.h
--- a/src/secp256k1/src/ecmult_const.h
+++ b/src/secp256k1/src/ecmult_const.h
@@ -10,6 +10,8 @@
 #include "scalar.h"
 #include "group.h"
 
-static void secp256k1_ecmult_const(secp256k1_gej *r, const secp256k1_ge *a, const secp256k1_scalar *q);
+/* Here `bits` should be set to the maximum bitlength of the _absolute value_ of `q`, plus
+ * one because we internally sometimes add 2 to the number during the WNAF conversion. */
+static void secp256k1_ecmult_const(secp256k1_gej *r, const secp256k1_ge *a, const secp256k1_scalar *q, int bits);
 
 #endif /* SECP256K1_ECMULT_CONST_H */
diff --git a/src/secp256k1/src/ecmult_const_impl.h b/src/secp256k1/src/ecmult_const_impl.h
--- a/src/secp256k1/src/ecmult_const_impl.h
+++ b/src/secp256k1/src/ecmult_const_impl.h
@@ -48,7 +48,7 @@
  *
  *  Numbers reference steps of `Algorithm SPA-resistant Width-w NAF with Odd Scalar` on pp. 335
  */
-static int secp256k1_wnaf_const(int *wnaf, secp256k1_scalar s, int w) {
+static int secp256k1_wnaf_const(int *wnaf, secp256k1_scalar s, int w, int size) {
     int global_sign;
     int skew = 0;
     int word = 0;
@@ -67,9 +67,14 @@
      * and we'd lose any performance benefit. Instead, we use a technique from
      * Section 4.2 of the Okeya/Tagaki paper, which is to add either 1 (for even)
      * or 2 (for odd) to the number we are encoding, returning a skew value indicating
-     * this, and having the caller compensate after doing the multiplication. */
-
-    /* Negative numbers will be negated to keep their bit representation below the maximum width */
+     * this, and having the caller compensate after doing the multiplication.
+     *
+     * In fact, we _do_ want to negate numbers to minimize their bit-lengths (and in
+     * particular, to ensure that the outputs from the endomorphism-split fit into
+     * 128 bits). If we negate, the parity of our number flips, inverting which of
+     * {1, 2} we want to add to the scalar when ensuring that it's odd. Further
+     * complicating things, -1 interacts badly with `secp256k1_scalar_cadd_bit` and
+     * we need to special-case it in this logic. */
     flip = secp256k1_scalar_is_high(&s);
     /* We add 1 to even numbers, 2 to odd ones, noting that negation flips parity */
     bit = flip ^ !secp256k1_scalar_is_even(&s);
@@ -88,7 +93,7 @@
 
     /* 4 */
     u_last = secp256k1_scalar_shr_int(&s, w);
-    while (word * w < WNAF_BITS) {
+    while (word * w < size) {
         int sign;
         int even;
 
@@ -108,37 +113,44 @@
     wnaf[word] = u * global_sign;
 
     VERIFY_CHECK(secp256k1_scalar_is_zero(&s));
-    VERIFY_CHECK(word == WNAF_SIZE(w));
+    VERIFY_CHECK(word == WNAF_SIZE_BITS(size, w));
     return skew;
 }
 
-
-static void secp256k1_ecmult_const(secp256k1_gej *r, const secp256k1_ge *a, const secp256k1_scalar *scalar) {
+static void secp256k1_ecmult_const(secp256k1_gej *r, const secp256k1_ge *a, const secp256k1_scalar *scalar, int size) {
     secp256k1_ge pre_a[ECMULT_TABLE_SIZE(WINDOW_A)];
     secp256k1_ge tmpa;
     secp256k1_fe Z;
 
     int skew_1;
-    int wnaf_1[1 + WNAF_SIZE(WINDOW_A - 1)];
 #ifdef USE_ENDOMORPHISM
     secp256k1_ge pre_a_lam[ECMULT_TABLE_SIZE(WINDOW_A)];
     int wnaf_lam[1 + WNAF_SIZE(WINDOW_A - 1)];
     int skew_lam;
     secp256k1_scalar q_1, q_lam;
 #endif
+    int wnaf_1[1 + WNAF_SIZE(WINDOW_A - 1)];
 
     int i;
     secp256k1_scalar sc = *scalar;
 
     /* build wnaf representation for q. */
+    int rsize = size;
 #ifdef USE_ENDOMORPHISM
-    /* split q into q_1 and q_lam (where q = q_1 + q_lam*lambda, and q_1 and q_lam are ~128 bit) */
-    secp256k1_scalar_split_lambda(&q_1, &q_lam, &sc);
-    skew_1   = secp256k1_wnaf_const(wnaf_1,   q_1,   WINDOW_A - 1);
-    skew_lam = secp256k1_wnaf_const(wnaf_lam, q_lam, WINDOW_A - 1);
-#else
-    skew_1   = secp256k1_wnaf_const(wnaf_1, sc, WINDOW_A - 1);
+    if (size > 128) {
+        rsize = 128;
+        /* split q into q_1 and q_lam (where q = q_1 + q_lam*lambda, and q_1 and q_lam are ~128 bit) */
+        secp256k1_scalar_split_lambda(&q_1, &q_lam, &sc);
+        skew_1   = secp256k1_wnaf_const(wnaf_1,   q_1,   WINDOW_A - 1, 128);
+        skew_lam = secp256k1_wnaf_const(wnaf_lam, q_lam, WINDOW_A - 1, 128);
+    } else
 #endif
+    {
+        skew_1   = secp256k1_wnaf_const(wnaf_1, sc, WINDOW_A - 1, size);
+#ifdef USE_ENDOMORPHISM
+        skew_lam = 0;
+#endif
+    }
 
     /* Calculate odd multiples of a.
      * All multiples are brought to the same Z 'denominator', which is stored
@@ -152,26 +164,30 @@
         secp256k1_fe_normalize_weak(&pre_a[i].y);
     }
 #ifdef USE_ENDOMORPHISM
-    for (i = 0; i < ECMULT_TABLE_SIZE(WINDOW_A); i++) {
-        secp256k1_ge_mul_lambda(&pre_a_lam[i], &pre_a[i]);
+    if (size > 128) {
+        for (i = 0; i < ECMULT_TABLE_SIZE(WINDOW_A); i++) {
+            secp256k1_ge_mul_lambda(&pre_a_lam[i], &pre_a[i]);
+        }
     }
 #endif
 
     /* first loop iteration (separated out so we can directly set r, rather
      * than having it start at infinity, get doubled several times, then have
      * its new value added to it) */
-    i = wnaf_1[WNAF_SIZE(WINDOW_A - 1)];
+    i = wnaf_1[WNAF_SIZE_BITS(rsize, WINDOW_A - 1)];
     VERIFY_CHECK(i != 0);
     ECMULT_CONST_TABLE_GET_GE(&tmpa, pre_a, i, WINDOW_A);
     secp256k1_gej_set_ge(r, &tmpa);
 #ifdef USE_ENDOMORPHISM
-    i = wnaf_lam[WNAF_SIZE(WINDOW_A - 1)];
-    VERIFY_CHECK(i != 0);
-    ECMULT_CONST_TABLE_GET_GE(&tmpa, pre_a_lam, i, WINDOW_A);
-    secp256k1_gej_add_ge(r, r, &tmpa);
+    if (size > 128) {
+        i = wnaf_lam[WNAF_SIZE_BITS(rsize, WINDOW_A - 1)];
+        VERIFY_CHECK(i != 0);
+        ECMULT_CONST_TABLE_GET_GE(&tmpa, pre_a_lam, i, WINDOW_A);
+        secp256k1_gej_add_ge(r, r, &tmpa);
+    }
 #endif
     /* remaining loop iterations */
-    for (i = WNAF_SIZE(WINDOW_A - 1) - 1; i >= 0; i--) {
+    for (i = WNAF_SIZE_BITS(rsize, WINDOW_A - 1) - 1; i >= 0; i--) {
         int n;
         int j;
         for (j = 0; j < WINDOW_A - 1; ++j) {
@@ -183,10 +199,12 @@
         VERIFY_CHECK(n != 0);
         secp256k1_gej_add_ge(r, r, &tmpa);
 #ifdef USE_ENDOMORPHISM
-        n = wnaf_lam[i];
-        ECMULT_CONST_TABLE_GET_GE(&tmpa, pre_a_lam, n, WINDOW_A);
-        VERIFY_CHECK(n != 0);
-        secp256k1_gej_add_ge(r, r, &tmpa);
+        if (size > 128) {
+            n = wnaf_lam[i];
+            ECMULT_CONST_TABLE_GET_GE(&tmpa, pre_a_lam, n, WINDOW_A);
+            VERIFY_CHECK(n != 0);
+            secp256k1_gej_add_ge(r, r, &tmpa);
+        }
 #endif
     }
 
@@ -206,14 +224,18 @@
         secp256k1_ge_set_gej(&correction, &tmpj);
         secp256k1_ge_to_storage(&correction_1_stor, a);
 #ifdef USE_ENDOMORPHISM
-        secp256k1_ge_to_storage(&correction_lam_stor, a);
+        if (size > 128) {
+            secp256k1_ge_to_storage(&correction_lam_stor, a);
+        }
 #endif
         secp256k1_ge_to_storage(&a2_stor, &correction);
 
         /* For odd numbers this is 2a (so replace it), for even ones a (so no-op) */
         secp256k1_ge_storage_cmov(&correction_1_stor, &a2_stor, skew_1 == 2);
 #ifdef USE_ENDOMORPHISM
-        secp256k1_ge_storage_cmov(&correction_lam_stor, &a2_stor, skew_lam == 2);
+        if (size > 128) {
+            secp256k1_ge_storage_cmov(&correction_lam_stor, &a2_stor, skew_lam == 2);
+        }
 #endif
 
         /* Apply the correction */
@@ -222,10 +244,12 @@
         secp256k1_gej_add_ge(r, r, &correction);
 
 #ifdef USE_ENDOMORPHISM
-        secp256k1_ge_from_storage(&correction, &correction_lam_stor);
-        secp256k1_ge_neg(&correction, &correction);
-        secp256k1_ge_mul_lambda(&correction, &correction);
-        secp256k1_gej_add_ge(r, r, &correction);
+        if (size > 128) {
+            secp256k1_ge_from_storage(&correction, &correction_lam_stor);
+            secp256k1_ge_neg(&correction, &correction);
+            secp256k1_ge_mul_lambda(&correction, &correction);
+            secp256k1_gej_add_ge(r, r, &correction);
+        }
 #endif
     }
 }
diff --git a/src/secp256k1/src/ecmult_impl.h b/src/secp256k1/src/ecmult_impl.h
--- a/src/secp256k1/src/ecmult_impl.h
+++ b/src/secp256k1/src/ecmult_impl.h
@@ -47,7 +47,8 @@
 #else
     #define WNAF_BITS 256
 #endif
-#define WNAF_SIZE(w) ((WNAF_BITS + (w) - 1) / (w))
+#define WNAF_SIZE_BITS(bits, w) (((bits) + (w) - 1) / (w))
+#define WNAF_SIZE(w) WNAF_SIZE_BITS(WNAF_BITS, w)
 
 /** The number of entries a table with precomputed multiples needs to have. */
 #define ECMULT_TABLE_SIZE(w) (1 << ((w)-2))
diff --git a/src/secp256k1/src/modules/ecdh/main_impl.h b/src/secp256k1/src/modules/ecdh/main_impl.h
--- a/src/secp256k1/src/modules/ecdh/main_impl.h
+++ b/src/secp256k1/src/modules/ecdh/main_impl.h
@@ -30,7 +30,7 @@
         unsigned char y[1];
         secp256k1_sha256 sha;
 
-        secp256k1_ecmult_const(&res, &pt, &s);
+        secp256k1_ecmult_const(&res, &pt, &s, 256);
         secp256k1_ge_set_gej(&pt, &res);
         /* Compute a hash of the point in compressed form
          * Note we cannot use secp256k1_eckey_pubkey_serialize here since it does not
diff --git a/src/secp256k1/src/tests.c b/src/secp256k1/src/tests.c
--- a/src/secp256k1/src/tests.c
+++ b/src/secp256k1/src/tests.c
@@ -2443,7 +2443,7 @@
         0xb84e4e1b, 0xfb77e21f, 0x96baae2a, 0x63dec956
     );
     secp256k1_gej b;
-    secp256k1_ecmult_const(&b, &a, &xn);
+    secp256k1_ecmult_const(&b, &a, &xn, 256);
 
     CHECK(secp256k1_ge_is_valid_var(&a));
     ge_equals_gej(&expected_b, &b);
@@ -2459,12 +2459,12 @@
     random_scalar_order_test(&a);
     random_scalar_order_test(&b);
 
-    secp256k1_ecmult_const(&res1, &secp256k1_ge_const_g, &a);
-    secp256k1_ecmult_const(&res2, &secp256k1_ge_const_g, &b);
+    secp256k1_ecmult_const(&res1, &secp256k1_ge_const_g, &a, 256);
+    secp256k1_ecmult_const(&res2, &secp256k1_ge_const_g, &b, 256);
     secp256k1_ge_set_gej(&mid1, &res1);
     secp256k1_ge_set_gej(&mid2, &res2);
-    secp256k1_ecmult_const(&res1, &mid1, &b);
-    secp256k1_ecmult_const(&res2, &mid2, &a);
+    secp256k1_ecmult_const(&res1, &mid1, &b, 256);
+    secp256k1_ecmult_const(&res2, &mid2, &a, 256);
     secp256k1_ge_set_gej(&mid1, &res1);
     secp256k1_ge_set_gej(&mid2, &res2);
     ge_equals_ge(&mid1, &mid2);
@@ -2480,13 +2480,13 @@
     secp256k1_scalar_negate(&negone, &one);
 
     random_group_element_test(&point);
-    secp256k1_ecmult_const(&res1, &point, &zero);
+    secp256k1_ecmult_const(&res1, &point, &zero, 3);
     secp256k1_ge_set_gej(&res2, &res1);
     CHECK(secp256k1_ge_is_infinity(&res2));
-    secp256k1_ecmult_const(&res1, &point, &one);
+    secp256k1_ecmult_const(&res1, &point, &one, 2);
     secp256k1_ge_set_gej(&res2, &res1);
     ge_equals_ge(&res2, &point);
-    secp256k1_ecmult_const(&res1, &point, &negone);
+    secp256k1_ecmult_const(&res1, &point, &negone, 256);
     secp256k1_gej_neg(&res1, &res1);
     secp256k1_ge_set_gej(&res2, &res1);
     ge_equals_ge(&res2, &point);
@@ -2512,7 +2512,7 @@
     for (i = 0; i < 100; ++i) {
         secp256k1_ge tmp;
         secp256k1_ge_set_gej(&tmp, &point);
-        secp256k1_ecmult_const(&point, &tmp, &scalar);
+        secp256k1_ecmult_const(&point, &tmp, &scalar, 256);
     }
     secp256k1_ge_set_gej(&res, &point);
     ge_equals_gej(&res, &expected_point);
@@ -2968,6 +2968,7 @@
     int wnaf[256] = {0};
     int i;
     int skew;
+    int bits = 256;
     secp256k1_scalar num = *number;
 
     secp256k1_scalar_set_int(&x, 0);
@@ -2977,10 +2978,11 @@
     for (i = 0; i < 16; ++i) {
         secp256k1_scalar_shr_int(&num, 8);
     }
+    bits = 128;
 #endif
-    skew = secp256k1_wnaf_const(wnaf, num, w);
+    skew = secp256k1_wnaf_const(wnaf, num, w, bits);
 
-    for (i = WNAF_SIZE(w); i >= 0; --i) {
+    for (i = WNAF_SIZE_BITS(bits, w); i >= 0; --i) {
         secp256k1_scalar t;
         int v = wnaf[i];
         CHECK(v != 0); /* check nonzero */
diff --git a/src/secp256k1/src/tests_exhaustive.c b/src/secp256k1/src/tests_exhaustive.c
--- a/src/secp256k1/src/tests_exhaustive.c
+++ b/src/secp256k1/src/tests_exhaustive.c
@@ -174,7 +174,7 @@
                 ge_equals_gej(&group[(i * r_log + j) % order], &tmp);
 
                 if (i > 0) {
-                    secp256k1_ecmult_const(&tmp, &group[i], &ng);
+                    secp256k1_ecmult_const(&tmp, &group[i], &ng, 256);
                     ge_equals_gej(&group[(i * j) % order], &tmp);
                 }
             }