Changeset View
Changeset View
Standalone View
Standalone View
src/crypto/sha256_avx2.cpp
#ifdef ENABLE_AVX2 | #ifdef ENABLE_AVX2 | ||||
#include <stdint.h> | |||||
#if defined(_MSC_VER) | |||||
#include <immintrin.h> | #include <immintrin.h> | ||||
#elif defined(__GNUC__) | #include <stdint.h> | ||||
#include <x86intrin.h> | |||||
#endif | |||||
#include "crypto/common.h" | #include "crypto/common.h" | ||||
#include "crypto/sha256.h" | #include "crypto/sha256.h" | ||||
namespace sha256d64_avx2 { | namespace sha256d64_avx2 { | ||||
namespace { | namespace { | ||||
__m256i inline K(uint32_t x) { return _mm256_set1_epi32(x); } | __m256i inline K(uint32_t x) { return _mm256_set1_epi32(x); } | ||||
▲ Show 20 Lines • Show All 48 Lines • ▼ Show 20 Lines | __m256i inline sigma0(__m256i x) { | ||||
ShR(x, 3)); | ShR(x, 3)); | ||||
} | } | ||||
__m256i inline sigma1(__m256i x) { | __m256i inline sigma1(__m256i x) { | ||||
return Xor(Or(ShR(x, 17), ShL(x, 15)), Or(ShR(x, 19), ShL(x, 13)), | return Xor(Or(ShR(x, 17), ShL(x, 15)), Or(ShR(x, 19), ShL(x, 13)), | ||||
ShR(x, 10)); | ShR(x, 10)); | ||||
} | } | ||||
/** One round of SHA-256. */ | /** One round of SHA-256. */ | ||||
void inline __attribute__((always_inline)) | inline void __attribute__((always_inline)) | ||||
Round(__m256i a, __m256i b, __m256i c, __m256i &d, __m256i e, __m256i f, | Round(__m256i a, __m256i b, __m256i c, __m256i &d, __m256i e, __m256i f, | ||||
__m256i g, __m256i &h, __m256i k) { | __m256i g, __m256i &h, __m256i k) { | ||||
__m256i t1 = Add(h, Sigma1(e), Ch(e, f, g), k); | __m256i t1 = Add(h, Sigma1(e), Ch(e, f, g), k); | ||||
__m256i t2 = Add(Sigma0(a), Maj(a, b, c)); | __m256i t2 = Add(Sigma0(a), Maj(a, b, c)); | ||||
d = Add(d, t1); | d = Add(d, t1); | ||||
h = Add(t1, t2); | h = Add(t1, t2); | ||||
} | } | ||||
__m256i inline Read8(const uint8_t *chunk, int offset) { | __m256i inline Read8(const uint8_t *chunk, int offset) { | ||||
__m256i ret = _mm256_set_epi32( | __m256i ret = _mm256_set_epi32( | ||||
ReadLE32(chunk + 0 + offset), ReadLE32(chunk + 64 + offset), | ReadLE32(chunk + 0 + offset), ReadLE32(chunk + 64 + offset), | ||||
ReadLE32(chunk + 128 + offset), ReadLE32(chunk + 192 + offset), | ReadLE32(chunk + 128 + offset), ReadLE32(chunk + 192 + offset), | ||||
ReadLE32(chunk + 256 + offset), ReadLE32(chunk + 320 + offset), | ReadLE32(chunk + 256 + offset), ReadLE32(chunk + 320 + offset), | ||||
ReadLE32(chunk + 384 + offset), ReadLE32(chunk + 448 + offset)); | ReadLE32(chunk + 384 + offset), ReadLE32(chunk + 448 + offset)); | ||||
return _mm256_shuffle_epi8( | return _mm256_shuffle_epi8( | ||||
ret, _mm256_set_epi32(0x0C0D0E0FUL, 0x08090A0BUL, 0x04050607UL, | ret, _mm256_set_epi32(0x0C0D0E0FUL, 0x08090A0BUL, 0x04050607UL, | ||||
0x00010203UL, 0x0C0D0E0FUL, 0x08090A0BUL, | 0x00010203UL, 0x0C0D0E0FUL, 0x08090A0BUL, | ||||
0x04050607UL, 0x00010203UL)); | 0x04050607UL, 0x00010203UL)); | ||||
} | } | ||||
void inline Write8(uint8_t *out, int offset, __m256i v) { | inline void Write8(uint8_t *out, int offset, __m256i v) { | ||||
v = _mm256_shuffle_epi8( | v = _mm256_shuffle_epi8( | ||||
v, _mm256_set_epi32(0x0C0D0E0FUL, 0x08090A0BUL, 0x04050607UL, | v, _mm256_set_epi32(0x0C0D0E0FUL, 0x08090A0BUL, 0x04050607UL, | ||||
0x00010203UL, 0x0C0D0E0FUL, 0x08090A0BUL, | 0x00010203UL, 0x0C0D0E0FUL, 0x08090A0BUL, | ||||
0x04050607UL, 0x00010203UL)); | 0x04050607UL, 0x00010203UL)); | ||||
WriteLE32(out + 0 + offset, _mm256_extract_epi32(v, 7)); | WriteLE32(out + 0 + offset, _mm256_extract_epi32(v, 7)); | ||||
WriteLE32(out + 32 + offset, _mm256_extract_epi32(v, 6)); | WriteLE32(out + 32 + offset, _mm256_extract_epi32(v, 6)); | ||||
WriteLE32(out + 64 + offset, _mm256_extract_epi32(v, 5)); | WriteLE32(out + 64 + offset, _mm256_extract_epi32(v, 5)); | ||||
WriteLE32(out + 96 + offset, _mm256_extract_epi32(v, 4)); | WriteLE32(out + 96 + offset, _mm256_extract_epi32(v, 4)); | ||||
▲ Show 20 Lines • Show All 356 Lines • Show Last 20 Lines |