Changeset View
Changeset View
Standalone View
Standalone View
src/crypto/sha256_sse41.cpp
#ifdef ENABLE_SSE41 | #ifdef ENABLE_SSE41 | ||||
#include <stdint.h> | |||||
#if defined(_MSC_VER) | |||||
#include <immintrin.h> | #include <immintrin.h> | ||||
#elif defined(__GNUC__) | #include <stdint.h> | ||||
#include <x86intrin.h> | |||||
#endif | |||||
#include "crypto/common.h" | #include "crypto/common.h" | ||||
#include "crypto/sha256.h" | #include "crypto/sha256.h" | ||||
namespace sha256d64_sse41 { | namespace sha256d64_sse41 { | ||||
namespace { | namespace { | ||||
__m128i inline K(uint32_t x) { return _mm_set1_epi32(x); } | __m128i inline K(uint32_t x) { return _mm_set1_epi32(x); } | ||||
▲ Show 20 Lines • Show All 48 Lines • ▼ Show 20 Lines | __m128i inline sigma0(__m128i x) { | ||||
ShR(x, 3)); | ShR(x, 3)); | ||||
} | } | ||||
__m128i inline sigma1(__m128i x) { | __m128i inline sigma1(__m128i x) { | ||||
return Xor(Or(ShR(x, 17), ShL(x, 15)), Or(ShR(x, 19), ShL(x, 13)), | return Xor(Or(ShR(x, 17), ShL(x, 15)), Or(ShR(x, 19), ShL(x, 13)), | ||||
ShR(x, 10)); | ShR(x, 10)); | ||||
} | } | ||||
/** One round of SHA-256. */ | /** One round of SHA-256. */ | ||||
void inline __attribute__((always_inline)) | inline void __attribute__((always_inline)) | ||||
Round(__m128i a, __m128i b, __m128i c, __m128i &d, __m128i e, __m128i f, | Round(__m128i a, __m128i b, __m128i c, __m128i &d, __m128i e, __m128i f, | ||||
__m128i g, __m128i &h, __m128i k) { | __m128i g, __m128i &h, __m128i k) { | ||||
__m128i t1 = Add(h, Sigma1(e), Ch(e, f, g), k); | __m128i t1 = Add(h, Sigma1(e), Ch(e, f, g), k); | ||||
__m128i t2 = Add(Sigma0(a), Maj(a, b, c)); | __m128i t2 = Add(Sigma0(a), Maj(a, b, c)); | ||||
d = Add(d, t1); | d = Add(d, t1); | ||||
h = Add(t1, t2); | h = Add(t1, t2); | ||||
} | } | ||||
__m128i inline Read4(const uint8_t *chunk, int offset) { | __m128i inline Read4(const uint8_t *chunk, int offset) { | ||||
__m128i ret = _mm_set_epi32( | __m128i ret = _mm_set_epi32( | ||||
ReadLE32(chunk + 0 + offset), ReadLE32(chunk + 64 + offset), | ReadLE32(chunk + 0 + offset), ReadLE32(chunk + 64 + offset), | ||||
ReadLE32(chunk + 128 + offset), ReadLE32(chunk + 192 + offset)); | ReadLE32(chunk + 128 + offset), ReadLE32(chunk + 192 + offset)); | ||||
return _mm_shuffle_epi8(ret, _mm_set_epi32(0x0C0D0E0FUL, 0x08090A0BUL, | return _mm_shuffle_epi8(ret, _mm_set_epi32(0x0C0D0E0FUL, 0x08090A0BUL, | ||||
0x04050607UL, 0x00010203UL)); | 0x04050607UL, 0x00010203UL)); | ||||
} | } | ||||
void inline Write4(uint8_t *out, int offset, __m128i v) { | inline void Write4(uint8_t *out, int offset, __m128i v) { | ||||
v = _mm_shuffle_epi8(v, _mm_set_epi32(0x0C0D0E0FUL, 0x08090A0BUL, | v = _mm_shuffle_epi8(v, _mm_set_epi32(0x0C0D0E0FUL, 0x08090A0BUL, | ||||
0x04050607UL, 0x00010203UL)); | 0x04050607UL, 0x00010203UL)); | ||||
WriteLE32(out + 0 + offset, _mm_extract_epi32(v, 3)); | WriteLE32(out + 0 + offset, _mm_extract_epi32(v, 3)); | ||||
WriteLE32(out + 32 + offset, _mm_extract_epi32(v, 2)); | WriteLE32(out + 32 + offset, _mm_extract_epi32(v, 2)); | ||||
WriteLE32(out + 64 + offset, _mm_extract_epi32(v, 1)); | WriteLE32(out + 64 + offset, _mm_extract_epi32(v, 1)); | ||||
WriteLE32(out + 96 + offset, _mm_extract_epi32(v, 0)); | WriteLE32(out + 96 + offset, _mm_extract_epi32(v, 0)); | ||||
} | } | ||||
} | } | ||||
▲ Show 20 Lines • Show All 350 Lines • Show Last 20 Lines |