Changeset View
Changeset View
Standalone View
Standalone View
src/crypto/sha256_shani.cpp
Show First 20 Lines • Show All 64 Lines • ▼ Show 20 Lines | |||||
inline void __attribute__((always_inline)) Unshuffle(__m128i &s0, __m128i &s1) { | inline void __attribute__((always_inline)) Unshuffle(__m128i &s0, __m128i &s1) { | ||||
const __m128i t1 = _mm_shuffle_epi32(s0, 0x1B); | const __m128i t1 = _mm_shuffle_epi32(s0, 0x1B); | ||||
const __m128i t2 = _mm_shuffle_epi32(s1, 0xB1); | const __m128i t2 = _mm_shuffle_epi32(s1, 0xB1); | ||||
s0 = _mm_blend_epi16(t1, t2, 0xF0); | s0 = _mm_blend_epi16(t1, t2, 0xF0); | ||||
s1 = _mm_alignr_epi8(t2, t1, 0x08); | s1 = _mm_alignr_epi8(t2, t1, 0x08); | ||||
} | } | ||||
__m128i inline __attribute__((always_inline)) Load(const unsigned char *in) { | __m128i inline __attribute__((always_inline)) Load(const uint8_t *in) { | ||||
return _mm_shuffle_epi8(_mm_loadu_si128((const __m128i *)in), MASK); | return _mm_shuffle_epi8(_mm_loadu_si128((const __m128i *)in), MASK); | ||||
} | } | ||||
inline void __attribute__((always_inline)) Save(unsigned char *out, __m128i s) { | inline void __attribute__((always_inline)) Save(uint8_t *out, __m128i s) { | ||||
_mm_storeu_si128((__m128i *)out, _mm_shuffle_epi8(s, MASK)); | _mm_storeu_si128((__m128i *)out, _mm_shuffle_epi8(s, MASK)); | ||||
} | } | ||||
} | } | ||||
namespace sha256_shani { | namespace sha256_shani { | ||||
void Transform(uint32_t *s, const unsigned char *chunk, size_t blocks) { | void Transform(uint32_t *s, const uint8_t *chunk, size_t blocks) { | ||||
__m128i m0, m1, m2, m3, s0, s1, so0, so1; | __m128i m0, m1, m2, m3, s0, s1, so0, so1; | ||||
/* Load state */ | /* Load state */ | ||||
s0 = _mm_loadu_si128((const __m128i *)s); | s0 = _mm_loadu_si128((const __m128i *)s); | ||||
s1 = _mm_loadu_si128((const __m128i *)(s + 4)); | s1 = _mm_loadu_si128((const __m128i *)(s + 4)); | ||||
Shuffle(s0, s1); | Shuffle(s0, s1); | ||||
while (blocks--) { | while (blocks--) { | ||||
▲ Show 20 Lines • Show All 48 Lines • ▼ Show 20 Lines | void Transform(uint32_t *s, const uint8_t *chunk, size_t blocks) { | ||||
Unshuffle(s0, s1); | Unshuffle(s0, s1); | ||||
_mm_storeu_si128((__m128i *)s, s0); | _mm_storeu_si128((__m128i *)s, s0); | ||||
_mm_storeu_si128((__m128i *)(s + 4), s1); | _mm_storeu_si128((__m128i *)(s + 4), s1); | ||||
} | } | ||||
} | } | ||||
namespace sha256d64_shani { | namespace sha256d64_shani { | ||||
void Transform_2way(unsigned char *out, const unsigned char *in) { | void Transform_2way(uint8_t *out, const uint8_t *in) { | ||||
__m128i am0, am1, am2, am3, as0, as1, aso0, aso1; | __m128i am0, am1, am2, am3, as0, as1, aso0, aso1; | ||||
__m128i bm0, bm1, bm2, bm3, bs0, bs1, bso0, bso1; | __m128i bm0, bm1, bm2, bm3, bs0, bs1, bso0, bso1; | ||||
/* Transform 1 */ | /* Transform 1 */ | ||||
bs0 = as0 = INIT0; | bs0 = as0 = INIT0; | ||||
bs1 = as1 = INIT1; | bs1 = as1 = INIT1; | ||||
am0 = Load(in); | am0 = Load(in); | ||||
bm0 = Load(in + 64); | bm0 = Load(in + 64); | ||||
▲ Show 20 Lines • Show All 202 Lines • Show Last 20 Lines |