diff --git a/src/secp256k1/sage/group_prover.sage b/src/secp256k1/sage/group_prover.sage index 5198724be..8521f0799 100644 --- a/src/secp256k1/sage/group_prover.sage +++ b/src/secp256k1/sage/group_prover.sage @@ -1,322 +1,322 @@ # This code supports verifying group implementations which have branches # or conditional statements (like cmovs), by allowing each execution path # to independently set assumptions on input or intermediary variables. # # The general approach is: # * A constraint is a tuple of two sets of symbolic expressions: # the first of which are required to evaluate to zero, the second of which # are required to evaluate to nonzero. # - A constraint is said to be conflicting if any of its nonzero expressions # is in the ideal with basis the zero expressions (in other words: when the # zero expressions imply that one of the nonzero expressions are zero). # * There is a list of laws that describe the intended behaviour, including # laws for addition and doubling. Each law is called with the symbolic point # coordinates as arguments, and returns: # - A constraint describing the assumptions under which it is applicable, # called "assumeLaw" # - A constraint describing the requirements of the law, called "require" # * Implementations are transliterated into functions that operate as well on # algebraic input points, and are called once per combination of branches -# exectured. Each execution returns: +# executed. Each execution returns: # - A constraint describing the assumptions this implementation requires # (such as Z1=1), called "assumeFormula" # - A constraint describing the assumptions this specific branch requires, # but which is by construction guaranteed to cover the entire space by # merging the results from all branches, called "assumeBranch" # - The result of the computation # * All combinations of laws with implementation branches are tried, and: # - If the combination of assumeLaw, assumeFormula, and assumeBranch results # in a conflict, it means this law does not apply to this branch, and it is # skipped. # - For others, we try to prove the require constraints hold, assuming the # information in assumeLaw + assumeFormula + assumeBranch, and if this does # not succeed, we fail. # + To prove an expression is zero, we check whether it belongs to the # ideal with the assumed zero expressions as basis. This test is exact. # + To prove an expression is nonzero, we check whether each of its # factors is contained in the set of nonzero assumptions' factors. # This test is not exact, so various combinations of original and # reduced expressions' factors are tried. # - If we succeed, we print out the assumptions from assumeFormula that # weren't implied by assumeLaw already. Those from assumeBranch are skipped, # as we assume that all constraints in it are complementary with each other. # # Based on the sage verification scripts used in the Explicit-Formulas Database # by Tanja Lange and others, see http://hyperelliptic.org/EFD class fastfrac: """Fractions over rings.""" def __init__(self,R,top,bot=1): """Construct a fractional, given a ring, a numerator, and denominator.""" self.R = R if parent(top) == ZZ or parent(top) == R: self.top = R(top) self.bot = R(bot) elif top.__class__ == fastfrac: self.top = top.top self.bot = top.bot * bot else: self.top = R(numerator(top)) self.bot = R(denominator(top)) * bot def iszero(self,I): """Return whether this fraction is zero given an ideal.""" return self.top in I and self.bot not in I def reduce(self,assumeZero): zero = self.R.ideal(map(numerator, assumeZero)) return fastfrac(self.R, zero.reduce(self.top)) / fastfrac(self.R, zero.reduce(self.bot)) def __add__(self,other): """Add two fractions.""" if parent(other) == ZZ: return fastfrac(self.R,self.top + self.bot * other,self.bot) if other.__class__ == fastfrac: return fastfrac(self.R,self.top * other.bot + self.bot * other.top,self.bot * other.bot) return NotImplemented def __sub__(self,other): """Subtract two fractions.""" if parent(other) == ZZ: return fastfrac(self.R,self.top - self.bot * other,self.bot) if other.__class__ == fastfrac: return fastfrac(self.R,self.top * other.bot - self.bot * other.top,self.bot * other.bot) return NotImplemented def __neg__(self): """Return the negation of a fraction.""" return fastfrac(self.R,-self.top,self.bot) def __mul__(self,other): """Multiply two fractions.""" if parent(other) == ZZ: return fastfrac(self.R,self.top * other,self.bot) if other.__class__ == fastfrac: return fastfrac(self.R,self.top * other.top,self.bot * other.bot) return NotImplemented def __rmul__(self,other): """Multiply something else with a fraction.""" return self.__mul__(other) def __div__(self,other): """Divide two fractions.""" if parent(other) == ZZ: return fastfrac(self.R,self.top,self.bot * other) if other.__class__ == fastfrac: return fastfrac(self.R,self.top * other.bot,self.bot * other.top) return NotImplemented def __pow__(self,other): """Compute a power of a fraction.""" if parent(other) == ZZ: if other < 0: # Negative powers require flipping top and bottom return fastfrac(self.R,self.bot ^ (-other),self.top ^ (-other)) else: return fastfrac(self.R,self.top ^ other,self.bot ^ other) return NotImplemented def __str__(self): return "fastfrac((" + str(self.top) + ") / (" + str(self.bot) + "))" def __repr__(self): return "%s" % self def numerator(self): return self.top class constraints: """A set of constraints, consisting of zero and nonzero expressions. Constraints can either be used to express knowledge or a requirement. Both the fields zero and nonzero are maps from expressions to description strings. The expressions that are the keys in zero are required to be zero, and the expressions that are the keys in nonzero are required to be nonzero. Note that (a != 0) and (b != 0) is the same as (a*b != 0), so all keys in nonzero could be multiplied into a single key. This is often much less efficient to work with though, so we keep them separate inside the constraints. This allows higher-level code to do fast checks on the individual nonzero elements, or combine them if needed for stronger checks. We can't multiply the different zero elements, as it would suffice for one of the factors to be zero, instead of all of them. Instead, the zero elements are typically combined into an ideal first. """ def __init__(self, **kwargs): if 'zero' in kwargs: self.zero = dict(kwargs['zero']) else: self.zero = dict() if 'nonzero' in kwargs: self.nonzero = dict(kwargs['nonzero']) else: self.nonzero = dict() def negate(self): return constraints(zero=self.nonzero, nonzero=self.zero) def __add__(self, other): zero = self.zero.copy() zero.update(other.zero) nonzero = self.nonzero.copy() nonzero.update(other.nonzero) return constraints(zero=zero, nonzero=nonzero) def __str__(self): return "constraints(zero=%s,nonzero=%s)" % (self.zero, self.nonzero) def __repr__(self): return "%s" % self def conflicts(R, con): """Check whether any of the passed non-zero assumptions is implied by the zero assumptions""" zero = R.ideal(map(numerator, con.zero)) if 1 in zero: return True # First a cheap check whether any of the individual nonzero terms conflict on # their own. for nonzero in con.nonzero: if nonzero.iszero(zero): return True # It can be the case that entries in the nonzero set do not individually # conflict with the zero set, but their combination does. For example, knowing # that either x or y is zero is equivalent to having x*y in the zero set. # Having x or y individually in the nonzero set is not a conflict, but both # simultaneously is, so that is the right thing to check for. if reduce(lambda a,b: a * b, con.nonzero, fastfrac(R, 1)).iszero(zero): return True return False def get_nonzero_set(R, assume): """Calculate a simple set of nonzero expressions""" zero = R.ideal(map(numerator, assume.zero)) nonzero = set() for nz in map(numerator, assume.nonzero): for (f,n) in nz.factor(): nonzero.add(f) rnz = zero.reduce(nz) for (f,n) in rnz.factor(): nonzero.add(f) return nonzero def prove_nonzero(R, exprs, assume): """Check whether an expression is provably nonzero, given assumptions""" zero = R.ideal(map(numerator, assume.zero)) nonzero = get_nonzero_set(R, assume) expl = set() ok = True for expr in exprs: if numerator(expr) in zero: return (False, [exprs[expr]]) allexprs = reduce(lambda a,b: numerator(a)*numerator(b), exprs, 1) for (f, n) in allexprs.factor(): if f not in nonzero: ok = False if ok: return (True, None) ok = True for (f, n) in zero.reduce(numerator(allexprs)).factor(): if f not in nonzero: ok = False if ok: return (True, None) ok = True for expr in exprs: for (f,n) in numerator(expr).factor(): if f not in nonzero: ok = False if ok: return (True, None) ok = True for expr in exprs: for (f,n) in zero.reduce(numerator(expr)).factor(): if f not in nonzero: expl.add(exprs[expr]) if expl: return (False, list(expl)) else: return (True, None) def prove_zero(R, exprs, assume): """Check whether all of the passed expressions are provably zero, given assumptions""" r, e = prove_nonzero(R, dict(map(lambda x: (fastfrac(R, x.bot, 1), exprs[x]), exprs)), assume) if not r: return (False, map(lambda x: "Possibly zero denominator: %s" % x, e)) zero = R.ideal(map(numerator, assume.zero)) nonzero = prod(x for x in assume.nonzero) expl = [] for expr in exprs: if not expr.iszero(zero): expl.append(exprs[expr]) if not expl: return (True, None) return (False, expl) def describe_extra(R, assume, assumeExtra): """Describe what assumptions are added, given existing assumptions""" zerox = assume.zero.copy() zerox.update(assumeExtra.zero) zero = R.ideal(map(numerator, assume.zero)) zeroextra = R.ideal(map(numerator, zerox)) nonzero = get_nonzero_set(R, assume) ret = set() # Iterate over the extra zero expressions for base in assumeExtra.zero: if base not in zero: add = [] for (f, n) in numerator(base).factor(): if f not in nonzero: add += ["%s" % f] if add: ret.add((" * ".join(add)) + " = 0 [%s]" % assumeExtra.zero[base]) # Iterate over the extra nonzero expressions for nz in assumeExtra.nonzero: nzr = zeroextra.reduce(numerator(nz)) if nzr not in zeroextra: for (f,n) in nzr.factor(): if zeroextra.reduce(f) not in nonzero: ret.add("%s != 0" % zeroextra.reduce(f)) return ", ".join(x for x in ret) def check_symbolic(R, assumeLaw, assumeAssert, assumeBranch, require): """Check a set of zero and nonzero requirements, given a set of zero and nonzero assumptions""" assume = assumeLaw + assumeAssert + assumeBranch if conflicts(R, assume): # This formula does not apply return None describe = describe_extra(R, assumeLaw + assumeBranch, assumeAssert) ok, msg = prove_zero(R, require.zero, assume) if not ok: return "FAIL, %s fails (assuming %s)" % (str(msg), describe) res, expl = prove_nonzero(R, require.nonzero, assume) if not res: return "FAIL, %s fails (assuming %s)" % (str(expl), describe) if describe != "": return "OK (assuming %s)" % describe else: return "OK" def concrete_verify(c): for k in c.zero: if k != 0: return (False, c.zero[k]) for k in c.nonzero: if k == 0: return (False, c.nonzero[k]) return (True, None) diff --git a/src/secp256k1/src/asm/field_10x26_arm.s b/src/secp256k1/src/asm/field_10x26_arm.s index bd2b629e1..5a9cc3ffc 100644 --- a/src/secp256k1/src/asm/field_10x26_arm.s +++ b/src/secp256k1/src/asm/field_10x26_arm.s @@ -1,919 +1,919 @@ @ vim: set tabstop=8 softtabstop=8 shiftwidth=8 noexpandtab syntax=armasm: /********************************************************************** * Copyright (c) 2014 Wladimir J. van der Laan * * Distributed under the MIT software license, see the accompanying * * file COPYING or http://www.opensource.org/licenses/mit-license.php.* **********************************************************************/ /* ARM implementation of field_10x26 inner loops. Note: - To avoid unnecessary loads and make use of available registers, two 'passes' have every time been interleaved, with the odd passes accumulating c' and d' which will be added to c and d respectively in the even passes */ .syntax unified .arch armv7-a @ eabi attributes - see readelf -A .eabi_attribute 8, 1 @ Tag_ARM_ISA_use = yes .eabi_attribute 9, 0 @ Tag_Thumb_ISA_use = no .eabi_attribute 10, 0 @ Tag_FP_arch = none .eabi_attribute 24, 1 @ Tag_ABI_align_needed = 8-byte .eabi_attribute 25, 1 @ Tag_ABI_align_preserved = 8-byte, except leaf SP - .eabi_attribute 30, 2 @ Tag_ABI_optimization_goals = Agressive Speed + .eabi_attribute 30, 2 @ Tag_ABI_optimization_goals = Aggressive Speed .eabi_attribute 34, 1 @ Tag_CPU_unaligned_access = v6 .text @ Field constants .set field_R0, 0x3d10 .set field_R1, 0x400 .set field_not_M, 0xfc000000 @ ~M = ~0x3ffffff .align 2 .global secp256k1_fe_mul_inner .type secp256k1_fe_mul_inner, %function @ Arguments: @ r0 r Restrict: can overlap with a, not with b @ r1 a @ r2 b @ Stack (total 4+10*4 = 44) @ sp + #0 saved 'r' pointer @ sp + #4 + 4*X t0,t1,t2,t3,t4,t5,t6,t7,u8,t9 secp256k1_fe_mul_inner: stmfd sp!, {r4, r5, r6, r7, r8, r9, r10, r11, r14} sub sp, sp, #48 @ frame=44 + alignment str r0, [sp, #0] @ save result address, we need it only at the end /****************************************** * Main computation code. ****************************************** Allocation: r0,r14,r7,r8 scratch r1 a (pointer) r2 b (pointer) r3:r4 c r5:r6 d r11:r12 c' r9:r10 d' Note: do not write to r[] here, it may overlap with a[] */ /* A - interleaved with B */ ldr r7, [r1, #0*4] @ a[0] ldr r8, [r2, #9*4] @ b[9] ldr r0, [r1, #1*4] @ a[1] umull r5, r6, r7, r8 @ d = a[0] * b[9] ldr r14, [r2, #8*4] @ b[8] umull r9, r10, r0, r8 @ d' = a[1] * b[9] ldr r7, [r1, #2*4] @ a[2] umlal r5, r6, r0, r14 @ d += a[1] * b[8] ldr r8, [r2, #7*4] @ b[7] umlal r9, r10, r7, r14 @ d' += a[2] * b[8] ldr r0, [r1, #3*4] @ a[3] umlal r5, r6, r7, r8 @ d += a[2] * b[7] ldr r14, [r2, #6*4] @ b[6] umlal r9, r10, r0, r8 @ d' += a[3] * b[7] ldr r7, [r1, #4*4] @ a[4] umlal r5, r6, r0, r14 @ d += a[3] * b[6] ldr r8, [r2, #5*4] @ b[5] umlal r9, r10, r7, r14 @ d' += a[4] * b[6] ldr r0, [r1, #5*4] @ a[5] umlal r5, r6, r7, r8 @ d += a[4] * b[5] ldr r14, [r2, #4*4] @ b[4] umlal r9, r10, r0, r8 @ d' += a[5] * b[5] ldr r7, [r1, #6*4] @ a[6] umlal r5, r6, r0, r14 @ d += a[5] * b[4] ldr r8, [r2, #3*4] @ b[3] umlal r9, r10, r7, r14 @ d' += a[6] * b[4] ldr r0, [r1, #7*4] @ a[7] umlal r5, r6, r7, r8 @ d += a[6] * b[3] ldr r14, [r2, #2*4] @ b[2] umlal r9, r10, r0, r8 @ d' += a[7] * b[3] ldr r7, [r1, #8*4] @ a[8] umlal r5, r6, r0, r14 @ d += a[7] * b[2] ldr r8, [r2, #1*4] @ b[1] umlal r9, r10, r7, r14 @ d' += a[8] * b[2] ldr r0, [r1, #9*4] @ a[9] umlal r5, r6, r7, r8 @ d += a[8] * b[1] ldr r14, [r2, #0*4] @ b[0] umlal r9, r10, r0, r8 @ d' += a[9] * b[1] ldr r7, [r1, #0*4] @ a[0] umlal r5, r6, r0, r14 @ d += a[9] * b[0] @ r7,r14 used in B bic r0, r5, field_not_M @ t9 = d & M str r0, [sp, #4 + 4*9] mov r5, r5, lsr #26 @ d >>= 26 orr r5, r5, r6, asl #6 mov r6, r6, lsr #26 /* B */ umull r3, r4, r7, r14 @ c = a[0] * b[0] adds r5, r5, r9 @ d += d' adc r6, r6, r10 bic r0, r5, field_not_M @ u0 = d & M mov r5, r5, lsr #26 @ d >>= 26 orr r5, r5, r6, asl #6 mov r6, r6, lsr #26 movw r14, field_R0 @ c += u0 * R0 umlal r3, r4, r0, r14 bic r14, r3, field_not_M @ t0 = c & M str r14, [sp, #4 + 0*4] mov r3, r3, lsr #26 @ c >>= 26 orr r3, r3, r4, asl #6 mov r4, r4, lsr #26 mov r14, field_R1 @ c += u0 * R1 umlal r3, r4, r0, r14 /* C - interleaved with D */ ldr r7, [r1, #0*4] @ a[0] ldr r8, [r2, #2*4] @ b[2] ldr r14, [r2, #1*4] @ b[1] umull r11, r12, r7, r8 @ c' = a[0] * b[2] ldr r0, [r1, #1*4] @ a[1] umlal r3, r4, r7, r14 @ c += a[0] * b[1] ldr r8, [r2, #0*4] @ b[0] umlal r11, r12, r0, r14 @ c' += a[1] * b[1] ldr r7, [r1, #2*4] @ a[2] umlal r3, r4, r0, r8 @ c += a[1] * b[0] ldr r14, [r2, #9*4] @ b[9] umlal r11, r12, r7, r8 @ c' += a[2] * b[0] ldr r0, [r1, #3*4] @ a[3] umlal r5, r6, r7, r14 @ d += a[2] * b[9] ldr r8, [r2, #8*4] @ b[8] umull r9, r10, r0, r14 @ d' = a[3] * b[9] ldr r7, [r1, #4*4] @ a[4] umlal r5, r6, r0, r8 @ d += a[3] * b[8] ldr r14, [r2, #7*4] @ b[7] umlal r9, r10, r7, r8 @ d' += a[4] * b[8] ldr r0, [r1, #5*4] @ a[5] umlal r5, r6, r7, r14 @ d += a[4] * b[7] ldr r8, [r2, #6*4] @ b[6] umlal r9, r10, r0, r14 @ d' += a[5] * b[7] ldr r7, [r1, #6*4] @ a[6] umlal r5, r6, r0, r8 @ d += a[5] * b[6] ldr r14, [r2, #5*4] @ b[5] umlal r9, r10, r7, r8 @ d' += a[6] * b[6] ldr r0, [r1, #7*4] @ a[7] umlal r5, r6, r7, r14 @ d += a[6] * b[5] ldr r8, [r2, #4*4] @ b[4] umlal r9, r10, r0, r14 @ d' += a[7] * b[5] ldr r7, [r1, #8*4] @ a[8] umlal r5, r6, r0, r8 @ d += a[7] * b[4] ldr r14, [r2, #3*4] @ b[3] umlal r9, r10, r7, r8 @ d' += a[8] * b[4] ldr r0, [r1, #9*4] @ a[9] umlal r5, r6, r7, r14 @ d += a[8] * b[3] ldr r8, [r2, #2*4] @ b[2] umlal r9, r10, r0, r14 @ d' += a[9] * b[3] umlal r5, r6, r0, r8 @ d += a[9] * b[2] bic r0, r5, field_not_M @ u1 = d & M mov r5, r5, lsr #26 @ d >>= 26 orr r5, r5, r6, asl #6 mov r6, r6, lsr #26 movw r14, field_R0 @ c += u1 * R0 umlal r3, r4, r0, r14 bic r14, r3, field_not_M @ t1 = c & M str r14, [sp, #4 + 1*4] mov r3, r3, lsr #26 @ c >>= 26 orr r3, r3, r4, asl #6 mov r4, r4, lsr #26 mov r14, field_R1 @ c += u1 * R1 umlal r3, r4, r0, r14 /* D */ adds r3, r3, r11 @ c += c' adc r4, r4, r12 adds r5, r5, r9 @ d += d' adc r6, r6, r10 bic r0, r5, field_not_M @ u2 = d & M mov r5, r5, lsr #26 @ d >>= 26 orr r5, r5, r6, asl #6 mov r6, r6, lsr #26 movw r14, field_R0 @ c += u2 * R0 umlal r3, r4, r0, r14 bic r14, r3, field_not_M @ t2 = c & M str r14, [sp, #4 + 2*4] mov r3, r3, lsr #26 @ c >>= 26 orr r3, r3, r4, asl #6 mov r4, r4, lsr #26 mov r14, field_R1 @ c += u2 * R1 umlal r3, r4, r0, r14 /* E - interleaved with F */ ldr r7, [r1, #0*4] @ a[0] ldr r8, [r2, #4*4] @ b[4] umull r11, r12, r7, r8 @ c' = a[0] * b[4] ldr r8, [r2, #3*4] @ b[3] umlal r3, r4, r7, r8 @ c += a[0] * b[3] ldr r7, [r1, #1*4] @ a[1] umlal r11, r12, r7, r8 @ c' += a[1] * b[3] ldr r8, [r2, #2*4] @ b[2] umlal r3, r4, r7, r8 @ c += a[1] * b[2] ldr r7, [r1, #2*4] @ a[2] umlal r11, r12, r7, r8 @ c' += a[2] * b[2] ldr r8, [r2, #1*4] @ b[1] umlal r3, r4, r7, r8 @ c += a[2] * b[1] ldr r7, [r1, #3*4] @ a[3] umlal r11, r12, r7, r8 @ c' += a[3] * b[1] ldr r8, [r2, #0*4] @ b[0] umlal r3, r4, r7, r8 @ c += a[3] * b[0] ldr r7, [r1, #4*4] @ a[4] umlal r11, r12, r7, r8 @ c' += a[4] * b[0] ldr r8, [r2, #9*4] @ b[9] umlal r5, r6, r7, r8 @ d += a[4] * b[9] ldr r7, [r1, #5*4] @ a[5] umull r9, r10, r7, r8 @ d' = a[5] * b[9] ldr r8, [r2, #8*4] @ b[8] umlal r5, r6, r7, r8 @ d += a[5] * b[8] ldr r7, [r1, #6*4] @ a[6] umlal r9, r10, r7, r8 @ d' += a[6] * b[8] ldr r8, [r2, #7*4] @ b[7] umlal r5, r6, r7, r8 @ d += a[6] * b[7] ldr r7, [r1, #7*4] @ a[7] umlal r9, r10, r7, r8 @ d' += a[7] * b[7] ldr r8, [r2, #6*4] @ b[6] umlal r5, r6, r7, r8 @ d += a[7] * b[6] ldr r7, [r1, #8*4] @ a[8] umlal r9, r10, r7, r8 @ d' += a[8] * b[6] ldr r8, [r2, #5*4] @ b[5] umlal r5, r6, r7, r8 @ d += a[8] * b[5] ldr r7, [r1, #9*4] @ a[9] umlal r9, r10, r7, r8 @ d' += a[9] * b[5] ldr r8, [r2, #4*4] @ b[4] umlal r5, r6, r7, r8 @ d += a[9] * b[4] bic r0, r5, field_not_M @ u3 = d & M mov r5, r5, lsr #26 @ d >>= 26 orr r5, r5, r6, asl #6 mov r6, r6, lsr #26 movw r14, field_R0 @ c += u3 * R0 umlal r3, r4, r0, r14 bic r14, r3, field_not_M @ t3 = c & M str r14, [sp, #4 + 3*4] mov r3, r3, lsr #26 @ c >>= 26 orr r3, r3, r4, asl #6 mov r4, r4, lsr #26 mov r14, field_R1 @ c += u3 * R1 umlal r3, r4, r0, r14 /* F */ adds r3, r3, r11 @ c += c' adc r4, r4, r12 adds r5, r5, r9 @ d += d' adc r6, r6, r10 bic r0, r5, field_not_M @ u4 = d & M mov r5, r5, lsr #26 @ d >>= 26 orr r5, r5, r6, asl #6 mov r6, r6, lsr #26 movw r14, field_R0 @ c += u4 * R0 umlal r3, r4, r0, r14 bic r14, r3, field_not_M @ t4 = c & M str r14, [sp, #4 + 4*4] mov r3, r3, lsr #26 @ c >>= 26 orr r3, r3, r4, asl #6 mov r4, r4, lsr #26 mov r14, field_R1 @ c += u4 * R1 umlal r3, r4, r0, r14 /* G - interleaved with H */ ldr r7, [r1, #0*4] @ a[0] ldr r8, [r2, #6*4] @ b[6] ldr r14, [r2, #5*4] @ b[5] umull r11, r12, r7, r8 @ c' = a[0] * b[6] ldr r0, [r1, #1*4] @ a[1] umlal r3, r4, r7, r14 @ c += a[0] * b[5] ldr r8, [r2, #4*4] @ b[4] umlal r11, r12, r0, r14 @ c' += a[1] * b[5] ldr r7, [r1, #2*4] @ a[2] umlal r3, r4, r0, r8 @ c += a[1] * b[4] ldr r14, [r2, #3*4] @ b[3] umlal r11, r12, r7, r8 @ c' += a[2] * b[4] ldr r0, [r1, #3*4] @ a[3] umlal r3, r4, r7, r14 @ c += a[2] * b[3] ldr r8, [r2, #2*4] @ b[2] umlal r11, r12, r0, r14 @ c' += a[3] * b[3] ldr r7, [r1, #4*4] @ a[4] umlal r3, r4, r0, r8 @ c += a[3] * b[2] ldr r14, [r2, #1*4] @ b[1] umlal r11, r12, r7, r8 @ c' += a[4] * b[2] ldr r0, [r1, #5*4] @ a[5] umlal r3, r4, r7, r14 @ c += a[4] * b[1] ldr r8, [r2, #0*4] @ b[0] umlal r11, r12, r0, r14 @ c' += a[5] * b[1] ldr r7, [r1, #6*4] @ a[6] umlal r3, r4, r0, r8 @ c += a[5] * b[0] ldr r14, [r2, #9*4] @ b[9] umlal r11, r12, r7, r8 @ c' += a[6] * b[0] ldr r0, [r1, #7*4] @ a[7] umlal r5, r6, r7, r14 @ d += a[6] * b[9] ldr r8, [r2, #8*4] @ b[8] umull r9, r10, r0, r14 @ d' = a[7] * b[9] ldr r7, [r1, #8*4] @ a[8] umlal r5, r6, r0, r8 @ d += a[7] * b[8] ldr r14, [r2, #7*4] @ b[7] umlal r9, r10, r7, r8 @ d' += a[8] * b[8] ldr r0, [r1, #9*4] @ a[9] umlal r5, r6, r7, r14 @ d += a[8] * b[7] ldr r8, [r2, #6*4] @ b[6] umlal r9, r10, r0, r14 @ d' += a[9] * b[7] umlal r5, r6, r0, r8 @ d += a[9] * b[6] bic r0, r5, field_not_M @ u5 = d & M mov r5, r5, lsr #26 @ d >>= 26 orr r5, r5, r6, asl #6 mov r6, r6, lsr #26 movw r14, field_R0 @ c += u5 * R0 umlal r3, r4, r0, r14 bic r14, r3, field_not_M @ t5 = c & M str r14, [sp, #4 + 5*4] mov r3, r3, lsr #26 @ c >>= 26 orr r3, r3, r4, asl #6 mov r4, r4, lsr #26 mov r14, field_R1 @ c += u5 * R1 umlal r3, r4, r0, r14 /* H */ adds r3, r3, r11 @ c += c' adc r4, r4, r12 adds r5, r5, r9 @ d += d' adc r6, r6, r10 bic r0, r5, field_not_M @ u6 = d & M mov r5, r5, lsr #26 @ d >>= 26 orr r5, r5, r6, asl #6 mov r6, r6, lsr #26 movw r14, field_R0 @ c += u6 * R0 umlal r3, r4, r0, r14 bic r14, r3, field_not_M @ t6 = c & M str r14, [sp, #4 + 6*4] mov r3, r3, lsr #26 @ c >>= 26 orr r3, r3, r4, asl #6 mov r4, r4, lsr #26 mov r14, field_R1 @ c += u6 * R1 umlal r3, r4, r0, r14 /* I - interleaved with J */ ldr r8, [r2, #8*4] @ b[8] ldr r7, [r1, #0*4] @ a[0] ldr r14, [r2, #7*4] @ b[7] umull r11, r12, r7, r8 @ c' = a[0] * b[8] ldr r0, [r1, #1*4] @ a[1] umlal r3, r4, r7, r14 @ c += a[0] * b[7] ldr r8, [r2, #6*4] @ b[6] umlal r11, r12, r0, r14 @ c' += a[1] * b[7] ldr r7, [r1, #2*4] @ a[2] umlal r3, r4, r0, r8 @ c += a[1] * b[6] ldr r14, [r2, #5*4] @ b[5] umlal r11, r12, r7, r8 @ c' += a[2] * b[6] ldr r0, [r1, #3*4] @ a[3] umlal r3, r4, r7, r14 @ c += a[2] * b[5] ldr r8, [r2, #4*4] @ b[4] umlal r11, r12, r0, r14 @ c' += a[3] * b[5] ldr r7, [r1, #4*4] @ a[4] umlal r3, r4, r0, r8 @ c += a[3] * b[4] ldr r14, [r2, #3*4] @ b[3] umlal r11, r12, r7, r8 @ c' += a[4] * b[4] ldr r0, [r1, #5*4] @ a[5] umlal r3, r4, r7, r14 @ c += a[4] * b[3] ldr r8, [r2, #2*4] @ b[2] umlal r11, r12, r0, r14 @ c' += a[5] * b[3] ldr r7, [r1, #6*4] @ a[6] umlal r3, r4, r0, r8 @ c += a[5] * b[2] ldr r14, [r2, #1*4] @ b[1] umlal r11, r12, r7, r8 @ c' += a[6] * b[2] ldr r0, [r1, #7*4] @ a[7] umlal r3, r4, r7, r14 @ c += a[6] * b[1] ldr r8, [r2, #0*4] @ b[0] umlal r11, r12, r0, r14 @ c' += a[7] * b[1] ldr r7, [r1, #8*4] @ a[8] umlal r3, r4, r0, r8 @ c += a[7] * b[0] ldr r14, [r2, #9*4] @ b[9] umlal r11, r12, r7, r8 @ c' += a[8] * b[0] ldr r0, [r1, #9*4] @ a[9] umlal r5, r6, r7, r14 @ d += a[8] * b[9] ldr r8, [r2, #8*4] @ b[8] umull r9, r10, r0, r14 @ d' = a[9] * b[9] umlal r5, r6, r0, r8 @ d += a[9] * b[8] bic r0, r5, field_not_M @ u7 = d & M mov r5, r5, lsr #26 @ d >>= 26 orr r5, r5, r6, asl #6 mov r6, r6, lsr #26 movw r14, field_R0 @ c += u7 * R0 umlal r3, r4, r0, r14 bic r14, r3, field_not_M @ t7 = c & M str r14, [sp, #4 + 7*4] mov r3, r3, lsr #26 @ c >>= 26 orr r3, r3, r4, asl #6 mov r4, r4, lsr #26 mov r14, field_R1 @ c += u7 * R1 umlal r3, r4, r0, r14 /* J */ adds r3, r3, r11 @ c += c' adc r4, r4, r12 adds r5, r5, r9 @ d += d' adc r6, r6, r10 bic r0, r5, field_not_M @ u8 = d & M str r0, [sp, #4 + 8*4] mov r5, r5, lsr #26 @ d >>= 26 orr r5, r5, r6, asl #6 mov r6, r6, lsr #26 movw r14, field_R0 @ c += u8 * R0 umlal r3, r4, r0, r14 /****************************************** * compute and write back result ****************************************** Allocation: r0 r r3:r4 c r5:r6 d r7 t0 r8 t1 r9 t2 r11 u8 r12 t9 r1,r2,r10,r14 scratch Note: do not read from a[] after here, it may overlap with r[] */ ldr r0, [sp, #0] add r1, sp, #4 + 3*4 @ r[3..7] = t3..7, r11=u8, r12=t9 ldmia r1, {r2,r7,r8,r9,r10,r11,r12} add r1, r0, #3*4 stmia r1, {r2,r7,r8,r9,r10} bic r2, r3, field_not_M @ r[8] = c & M str r2, [r0, #8*4] mov r3, r3, lsr #26 @ c >>= 26 orr r3, r3, r4, asl #6 mov r4, r4, lsr #26 mov r14, field_R1 @ c += u8 * R1 umlal r3, r4, r11, r14 movw r14, field_R0 @ c += d * R0 umlal r3, r4, r5, r14 adds r3, r3, r12 @ c += t9 adc r4, r4, #0 add r1, sp, #4 + 0*4 @ r7,r8,r9 = t0,t1,t2 ldmia r1, {r7,r8,r9} ubfx r2, r3, #0, #22 @ r[9] = c & (M >> 4) str r2, [r0, #9*4] mov r3, r3, lsr #22 @ c >>= 22 orr r3, r3, r4, asl #10 mov r4, r4, lsr #22 movw r14, field_R1 << 4 @ c += d * (R1 << 4) umlal r3, r4, r5, r14 movw r14, field_R0 >> 4 @ d = c * (R0 >> 4) + t0 (64x64 multiply+add) umull r5, r6, r3, r14 @ d = c.lo * (R0 >> 4) adds r5, r5, r7 @ d.lo += t0 mla r6, r14, r4, r6 @ d.hi += c.hi * (R0 >> 4) adc r6, r6, 0 @ d.hi += carry bic r2, r5, field_not_M @ r[0] = d & M str r2, [r0, #0*4] mov r5, r5, lsr #26 @ d >>= 26 orr r5, r5, r6, asl #6 mov r6, r6, lsr #26 movw r14, field_R1 >> 4 @ d += c * (R1 >> 4) + t1 (64x64 multiply+add) umull r1, r2, r3, r14 @ tmp = c.lo * (R1 >> 4) adds r5, r5, r8 @ d.lo += t1 adc r6, r6, #0 @ d.hi += carry adds r5, r5, r1 @ d.lo += tmp.lo mla r2, r14, r4, r2 @ tmp.hi += c.hi * (R1 >> 4) adc r6, r6, r2 @ d.hi += carry + tmp.hi bic r2, r5, field_not_M @ r[1] = d & M str r2, [r0, #1*4] mov r5, r5, lsr #26 @ d >>= 26 (ignore hi) orr r5, r5, r6, asl #6 add r5, r5, r9 @ d += t2 str r5, [r0, #2*4] @ r[2] = d add sp, sp, #48 ldmfd sp!, {r4, r5, r6, r7, r8, r9, r10, r11, pc} .size secp256k1_fe_mul_inner, .-secp256k1_fe_mul_inner .align 2 .global secp256k1_fe_sqr_inner .type secp256k1_fe_sqr_inner, %function @ Arguments: @ r0 r Can overlap with a @ r1 a @ Stack (total 4+10*4 = 44) @ sp + #0 saved 'r' pointer @ sp + #4 + 4*X t0,t1,t2,t3,t4,t5,t6,t7,u8,t9 secp256k1_fe_sqr_inner: stmfd sp!, {r4, r5, r6, r7, r8, r9, r10, r11, r14} sub sp, sp, #48 @ frame=44 + alignment str r0, [sp, #0] @ save result address, we need it only at the end /****************************************** * Main computation code. ****************************************** Allocation: r0,r14,r2,r7,r8 scratch r1 a (pointer) r3:r4 c r5:r6 d r11:r12 c' r9:r10 d' Note: do not write to r[] here, it may overlap with a[] */ /* A interleaved with B */ ldr r0, [r1, #1*4] @ a[1]*2 ldr r7, [r1, #0*4] @ a[0] mov r0, r0, asl #1 ldr r14, [r1, #9*4] @ a[9] umull r3, r4, r7, r7 @ c = a[0] * a[0] ldr r8, [r1, #8*4] @ a[8] mov r7, r7, asl #1 umull r5, r6, r7, r14 @ d = a[0]*2 * a[9] ldr r7, [r1, #2*4] @ a[2]*2 umull r9, r10, r0, r14 @ d' = a[1]*2 * a[9] ldr r14, [r1, #7*4] @ a[7] umlal r5, r6, r0, r8 @ d += a[1]*2 * a[8] mov r7, r7, asl #1 ldr r0, [r1, #3*4] @ a[3]*2 umlal r9, r10, r7, r8 @ d' += a[2]*2 * a[8] ldr r8, [r1, #6*4] @ a[6] umlal r5, r6, r7, r14 @ d += a[2]*2 * a[7] mov r0, r0, asl #1 ldr r7, [r1, #4*4] @ a[4]*2 umlal r9, r10, r0, r14 @ d' += a[3]*2 * a[7] ldr r14, [r1, #5*4] @ a[5] mov r7, r7, asl #1 umlal r5, r6, r0, r8 @ d += a[3]*2 * a[6] umlal r9, r10, r7, r8 @ d' += a[4]*2 * a[6] umlal r5, r6, r7, r14 @ d += a[4]*2 * a[5] umlal r9, r10, r14, r14 @ d' += a[5] * a[5] bic r0, r5, field_not_M @ t9 = d & M str r0, [sp, #4 + 9*4] mov r5, r5, lsr #26 @ d >>= 26 orr r5, r5, r6, asl #6 mov r6, r6, lsr #26 /* B */ adds r5, r5, r9 @ d += d' adc r6, r6, r10 bic r0, r5, field_not_M @ u0 = d & M mov r5, r5, lsr #26 @ d >>= 26 orr r5, r5, r6, asl #6 mov r6, r6, lsr #26 movw r14, field_R0 @ c += u0 * R0 umlal r3, r4, r0, r14 bic r14, r3, field_not_M @ t0 = c & M str r14, [sp, #4 + 0*4] mov r3, r3, lsr #26 @ c >>= 26 orr r3, r3, r4, asl #6 mov r4, r4, lsr #26 mov r14, field_R1 @ c += u0 * R1 umlal r3, r4, r0, r14 /* C interleaved with D */ ldr r0, [r1, #0*4] @ a[0]*2 ldr r14, [r1, #1*4] @ a[1] mov r0, r0, asl #1 ldr r8, [r1, #2*4] @ a[2] umlal r3, r4, r0, r14 @ c += a[0]*2 * a[1] mov r7, r8, asl #1 @ a[2]*2 umull r11, r12, r14, r14 @ c' = a[1] * a[1] ldr r14, [r1, #9*4] @ a[9] umlal r11, r12, r0, r8 @ c' += a[0]*2 * a[2] ldr r0, [r1, #3*4] @ a[3]*2 ldr r8, [r1, #8*4] @ a[8] umlal r5, r6, r7, r14 @ d += a[2]*2 * a[9] mov r0, r0, asl #1 ldr r7, [r1, #4*4] @ a[4]*2 umull r9, r10, r0, r14 @ d' = a[3]*2 * a[9] ldr r14, [r1, #7*4] @ a[7] umlal r5, r6, r0, r8 @ d += a[3]*2 * a[8] mov r7, r7, asl #1 ldr r0, [r1, #5*4] @ a[5]*2 umlal r9, r10, r7, r8 @ d' += a[4]*2 * a[8] ldr r8, [r1, #6*4] @ a[6] mov r0, r0, asl #1 umlal r5, r6, r7, r14 @ d += a[4]*2 * a[7] umlal r9, r10, r0, r14 @ d' += a[5]*2 * a[7] umlal r5, r6, r0, r8 @ d += a[5]*2 * a[6] umlal r9, r10, r8, r8 @ d' += a[6] * a[6] bic r0, r5, field_not_M @ u1 = d & M mov r5, r5, lsr #26 @ d >>= 26 orr r5, r5, r6, asl #6 mov r6, r6, lsr #26 movw r14, field_R0 @ c += u1 * R0 umlal r3, r4, r0, r14 bic r14, r3, field_not_M @ t1 = c & M str r14, [sp, #4 + 1*4] mov r3, r3, lsr #26 @ c >>= 26 orr r3, r3, r4, asl #6 mov r4, r4, lsr #26 mov r14, field_R1 @ c += u1 * R1 umlal r3, r4, r0, r14 /* D */ adds r3, r3, r11 @ c += c' adc r4, r4, r12 adds r5, r5, r9 @ d += d' adc r6, r6, r10 bic r0, r5, field_not_M @ u2 = d & M mov r5, r5, lsr #26 @ d >>= 26 orr r5, r5, r6, asl #6 mov r6, r6, lsr #26 movw r14, field_R0 @ c += u2 * R0 umlal r3, r4, r0, r14 bic r14, r3, field_not_M @ t2 = c & M str r14, [sp, #4 + 2*4] mov r3, r3, lsr #26 @ c >>= 26 orr r3, r3, r4, asl #6 mov r4, r4, lsr #26 mov r14, field_R1 @ c += u2 * R1 umlal r3, r4, r0, r14 /* E interleaved with F */ ldr r7, [r1, #0*4] @ a[0]*2 ldr r0, [r1, #1*4] @ a[1]*2 ldr r14, [r1, #2*4] @ a[2] mov r7, r7, asl #1 ldr r8, [r1, #3*4] @ a[3] ldr r2, [r1, #4*4] umlal r3, r4, r7, r8 @ c += a[0]*2 * a[3] mov r0, r0, asl #1 umull r11, r12, r7, r2 @ c' = a[0]*2 * a[4] mov r2, r2, asl #1 @ a[4]*2 umlal r11, r12, r0, r8 @ c' += a[1]*2 * a[3] ldr r8, [r1, #9*4] @ a[9] umlal r3, r4, r0, r14 @ c += a[1]*2 * a[2] ldr r0, [r1, #5*4] @ a[5]*2 umlal r11, r12, r14, r14 @ c' += a[2] * a[2] ldr r14, [r1, #8*4] @ a[8] mov r0, r0, asl #1 umlal r5, r6, r2, r8 @ d += a[4]*2 * a[9] ldr r7, [r1, #6*4] @ a[6]*2 umull r9, r10, r0, r8 @ d' = a[5]*2 * a[9] mov r7, r7, asl #1 ldr r8, [r1, #7*4] @ a[7] umlal r5, r6, r0, r14 @ d += a[5]*2 * a[8] umlal r9, r10, r7, r14 @ d' += a[6]*2 * a[8] umlal r5, r6, r7, r8 @ d += a[6]*2 * a[7] umlal r9, r10, r8, r8 @ d' += a[7] * a[7] bic r0, r5, field_not_M @ u3 = d & M mov r5, r5, lsr #26 @ d >>= 26 orr r5, r5, r6, asl #6 mov r6, r6, lsr #26 movw r14, field_R0 @ c += u3 * R0 umlal r3, r4, r0, r14 bic r14, r3, field_not_M @ t3 = c & M str r14, [sp, #4 + 3*4] mov r3, r3, lsr #26 @ c >>= 26 orr r3, r3, r4, asl #6 mov r4, r4, lsr #26 mov r14, field_R1 @ c += u3 * R1 umlal r3, r4, r0, r14 /* F */ adds r3, r3, r11 @ c += c' adc r4, r4, r12 adds r5, r5, r9 @ d += d' adc r6, r6, r10 bic r0, r5, field_not_M @ u4 = d & M mov r5, r5, lsr #26 @ d >>= 26 orr r5, r5, r6, asl #6 mov r6, r6, lsr #26 movw r14, field_R0 @ c += u4 * R0 umlal r3, r4, r0, r14 bic r14, r3, field_not_M @ t4 = c & M str r14, [sp, #4 + 4*4] mov r3, r3, lsr #26 @ c >>= 26 orr r3, r3, r4, asl #6 mov r4, r4, lsr #26 mov r14, field_R1 @ c += u4 * R1 umlal r3, r4, r0, r14 /* G interleaved with H */ ldr r7, [r1, #0*4] @ a[0]*2 ldr r0, [r1, #1*4] @ a[1]*2 mov r7, r7, asl #1 ldr r8, [r1, #5*4] @ a[5] ldr r2, [r1, #6*4] @ a[6] umlal r3, r4, r7, r8 @ c += a[0]*2 * a[5] ldr r14, [r1, #4*4] @ a[4] mov r0, r0, asl #1 umull r11, r12, r7, r2 @ c' = a[0]*2 * a[6] ldr r7, [r1, #2*4] @ a[2]*2 umlal r11, r12, r0, r8 @ c' += a[1]*2 * a[5] mov r7, r7, asl #1 ldr r8, [r1, #3*4] @ a[3] umlal r3, r4, r0, r14 @ c += a[1]*2 * a[4] mov r0, r2, asl #1 @ a[6]*2 umlal r11, r12, r7, r14 @ c' += a[2]*2 * a[4] ldr r14, [r1, #9*4] @ a[9] umlal r3, r4, r7, r8 @ c += a[2]*2 * a[3] ldr r7, [r1, #7*4] @ a[7]*2 umlal r11, r12, r8, r8 @ c' += a[3] * a[3] mov r7, r7, asl #1 ldr r8, [r1, #8*4] @ a[8] umlal r5, r6, r0, r14 @ d += a[6]*2 * a[9] umull r9, r10, r7, r14 @ d' = a[7]*2 * a[9] umlal r5, r6, r7, r8 @ d += a[7]*2 * a[8] umlal r9, r10, r8, r8 @ d' += a[8] * a[8] bic r0, r5, field_not_M @ u5 = d & M mov r5, r5, lsr #26 @ d >>= 26 orr r5, r5, r6, asl #6 mov r6, r6, lsr #26 movw r14, field_R0 @ c += u5 * R0 umlal r3, r4, r0, r14 bic r14, r3, field_not_M @ t5 = c & M str r14, [sp, #4 + 5*4] mov r3, r3, lsr #26 @ c >>= 26 orr r3, r3, r4, asl #6 mov r4, r4, lsr #26 mov r14, field_R1 @ c += u5 * R1 umlal r3, r4, r0, r14 /* H */ adds r3, r3, r11 @ c += c' adc r4, r4, r12 adds r5, r5, r9 @ d += d' adc r6, r6, r10 bic r0, r5, field_not_M @ u6 = d & M mov r5, r5, lsr #26 @ d >>= 26 orr r5, r5, r6, asl #6 mov r6, r6, lsr #26 movw r14, field_R0 @ c += u6 * R0 umlal r3, r4, r0, r14 bic r14, r3, field_not_M @ t6 = c & M str r14, [sp, #4 + 6*4] mov r3, r3, lsr #26 @ c >>= 26 orr r3, r3, r4, asl #6 mov r4, r4, lsr #26 mov r14, field_R1 @ c += u6 * R1 umlal r3, r4, r0, r14 /* I interleaved with J */ ldr r7, [r1, #0*4] @ a[0]*2 ldr r0, [r1, #1*4] @ a[1]*2 mov r7, r7, asl #1 ldr r8, [r1, #7*4] @ a[7] ldr r2, [r1, #8*4] @ a[8] umlal r3, r4, r7, r8 @ c += a[0]*2 * a[7] ldr r14, [r1, #6*4] @ a[6] mov r0, r0, asl #1 umull r11, r12, r7, r2 @ c' = a[0]*2 * a[8] ldr r7, [r1, #2*4] @ a[2]*2 umlal r11, r12, r0, r8 @ c' += a[1]*2 * a[7] ldr r8, [r1, #5*4] @ a[5] umlal r3, r4, r0, r14 @ c += a[1]*2 * a[6] ldr r0, [r1, #3*4] @ a[3]*2 mov r7, r7, asl #1 umlal r11, r12, r7, r14 @ c' += a[2]*2 * a[6] ldr r14, [r1, #4*4] @ a[4] mov r0, r0, asl #1 umlal r3, r4, r7, r8 @ c += a[2]*2 * a[5] mov r2, r2, asl #1 @ a[8]*2 umlal r11, r12, r0, r8 @ c' += a[3]*2 * a[5] umlal r3, r4, r0, r14 @ c += a[3]*2 * a[4] umlal r11, r12, r14, r14 @ c' += a[4] * a[4] ldr r8, [r1, #9*4] @ a[9] umlal r5, r6, r2, r8 @ d += a[8]*2 * a[9] @ r8 will be used in J bic r0, r5, field_not_M @ u7 = d & M mov r5, r5, lsr #26 @ d >>= 26 orr r5, r5, r6, asl #6 mov r6, r6, lsr #26 movw r14, field_R0 @ c += u7 * R0 umlal r3, r4, r0, r14 bic r14, r3, field_not_M @ t7 = c & M str r14, [sp, #4 + 7*4] mov r3, r3, lsr #26 @ c >>= 26 orr r3, r3, r4, asl #6 mov r4, r4, lsr #26 mov r14, field_R1 @ c += u7 * R1 umlal r3, r4, r0, r14 /* J */ adds r3, r3, r11 @ c += c' adc r4, r4, r12 umlal r5, r6, r8, r8 @ d += a[9] * a[9] bic r0, r5, field_not_M @ u8 = d & M str r0, [sp, #4 + 8*4] mov r5, r5, lsr #26 @ d >>= 26 orr r5, r5, r6, asl #6 mov r6, r6, lsr #26 movw r14, field_R0 @ c += u8 * R0 umlal r3, r4, r0, r14 /****************************************** * compute and write back result ****************************************** Allocation: r0 r r3:r4 c r5:r6 d r7 t0 r8 t1 r9 t2 r11 u8 r12 t9 r1,r2,r10,r14 scratch Note: do not read from a[] after here, it may overlap with r[] */ ldr r0, [sp, #0] add r1, sp, #4 + 3*4 @ r[3..7] = t3..7, r11=u8, r12=t9 ldmia r1, {r2,r7,r8,r9,r10,r11,r12} add r1, r0, #3*4 stmia r1, {r2,r7,r8,r9,r10} bic r2, r3, field_not_M @ r[8] = c & M str r2, [r0, #8*4] mov r3, r3, lsr #26 @ c >>= 26 orr r3, r3, r4, asl #6 mov r4, r4, lsr #26 mov r14, field_R1 @ c += u8 * R1 umlal r3, r4, r11, r14 movw r14, field_R0 @ c += d * R0 umlal r3, r4, r5, r14 adds r3, r3, r12 @ c += t9 adc r4, r4, #0 add r1, sp, #4 + 0*4 @ r7,r8,r9 = t0,t1,t2 ldmia r1, {r7,r8,r9} ubfx r2, r3, #0, #22 @ r[9] = c & (M >> 4) str r2, [r0, #9*4] mov r3, r3, lsr #22 @ c >>= 22 orr r3, r3, r4, asl #10 mov r4, r4, lsr #22 movw r14, field_R1 << 4 @ c += d * (R1 << 4) umlal r3, r4, r5, r14 movw r14, field_R0 >> 4 @ d = c * (R0 >> 4) + t0 (64x64 multiply+add) umull r5, r6, r3, r14 @ d = c.lo * (R0 >> 4) adds r5, r5, r7 @ d.lo += t0 mla r6, r14, r4, r6 @ d.hi += c.hi * (R0 >> 4) adc r6, r6, 0 @ d.hi += carry bic r2, r5, field_not_M @ r[0] = d & M str r2, [r0, #0*4] mov r5, r5, lsr #26 @ d >>= 26 orr r5, r5, r6, asl #6 mov r6, r6, lsr #26 movw r14, field_R1 >> 4 @ d += c * (R1 >> 4) + t1 (64x64 multiply+add) umull r1, r2, r3, r14 @ tmp = c.lo * (R1 >> 4) adds r5, r5, r8 @ d.lo += t1 adc r6, r6, #0 @ d.hi += carry adds r5, r5, r1 @ d.lo += tmp.lo mla r2, r14, r4, r2 @ tmp.hi += c.hi * (R1 >> 4) adc r6, r6, r2 @ d.hi += carry + tmp.hi bic r2, r5, field_not_M @ r[1] = d & M str r2, [r0, #1*4] mov r5, r5, lsr #26 @ d >>= 26 (ignore hi) orr r5, r5, r6, asl #6 add r5, r5, r9 @ d += t2 str r5, [r0, #2*4] @ r[2] = d add sp, sp, #48 ldmfd sp!, {r4, r5, r6, r7, r8, r9, r10, r11, pc} .size secp256k1_fe_sqr_inner, .-secp256k1_fe_sqr_inner