diff --git a/src/random.h b/src/random.h --- a/src/random.h +++ b/src/random.h @@ -25,7 +25,7 @@ * - A stack pointer (indirectly committing to calling thread and call stack) * - A high-precision timestamp (rdtsc when available, c++ * high_resolution_clock otherwise) - * - Hardware RNG (rdrand) when available. + * - 64 bits from the hardware RNG (rdrand) when available. * These entropy sources are very fast, and only designed to protect against * situations where a VM state restore/copy results in multiple systems with the * same randomness. FastRandomContext on the other hand does not protect against @@ -52,6 +52,7 @@ * * On first use of the RNG (regardless of what function is called first), all * entropy sources used in the 'slow' seeder are included, but also: + * - 256 bits from the hardware RNG (rdseed or rdrand) when available. * - (On Windows) Performance monitoring data from the OS. * - (On Windows) Through OpenSSL, the screen contents. * diff --git a/src/random.cpp b/src/random.cpp --- a/src/random.cpp +++ b/src/random.cpp @@ -22,7 +22,6 @@ #include #include -#include #include #include @@ -79,24 +78,147 @@ } #if defined(__x86_64__) || defined(__amd64__) || defined(__i386__) -static bool rdrand_supported = false; +static bool g_rdrand_supported = false; +static bool g_rdseed_supported = false; static constexpr uint32_t CPUID_F1_ECX_RDRAND = 0x40000000; +static constexpr uint32_t CPUID_F7_EBX_RDSEED = 0x00040000; +#ifdef bit_RDRND +static_assert(CPUID_F1_ECX_RDRAND == bit_RDRND, + "Unexpected value for bit_RDRND"); +#endif +#ifdef bit_RDSEED +static_assert(CPUID_F7_EBX_RDSEED == bit_RDSEED, + "Unexpected value for bit_RDSEED"); +#endif +static void inline GetCPUID(uint32_t leaf, uint32_t subleaf, uint32_t &a, + uint32_t &b, uint32_t &c, uint32_t &d) { + // We can't use __get_cpuid as it doesn't support subleafs. +#ifdef __GNUC__ + __cpuid_count(leaf, subleaf, a, b, c, d); +#else + __asm__("cpuid" + : "=a"(a), "=b"(b), "=c"(c), "=d"(d) + : "0"(leaf), "2"(subleaf)); +#endif +} + static void InitHardwareRand() { uint32_t eax, ebx, ecx, edx; - if (__get_cpuid(1, &eax, &ebx, &ecx, &edx) && (ecx & CPUID_F1_ECX_RDRAND)) { - rdrand_supported = true; + GetCPUID(1, 0, eax, ebx, ecx, edx); + if (ecx & CPUID_F1_ECX_RDRAND) { + g_rdrand_supported = true; + } + GetCPUID(7, 0, eax, ebx, ecx, edx); + if (ebx & CPUID_F7_EBX_RDSEED) { + g_rdseed_supported = true; } } static void ReportHardwareRand() { - if (rdrand_supported) { - // This must be done in a separate function, as HWRandInit() may be - // indirectly called from global constructors, before logging is - // initialized. + // This must be done in a separate function, as HWRandInit() may be + // indirectly called from global constructors, before logging is + // initialized. + if (g_rdseed_supported) { + LogPrintf("Using RdSeed as additional entropy source\n"); + } + if (g_rdrand_supported) { LogPrintf("Using RdRand as an additional entropy source\n"); } } +/** + * Read 64 bits of entropy using rdrand. + * + * Must only be called when RdRand is supported. + */ +static uint64_t GetRdRand() noexcept { + // RdRand may very rarely fail. Invoke it up to 10 times in a loop to reduce + // this risk. +#ifdef __i386__ + uint8_t ok; + uint32_t r1, r2; + for (int i = 0; i < 10; ++i) { + // rdrand %eax + __asm__ volatile(".byte 0x0f, 0xc7, 0xf0; setc %1" + : "=a"(r1), "=q"(ok)::"cc"); + if (ok) { + break; + } + } + for (int i = 0; i < 10; ++i) { + // rdrand %eax + __asm__ volatile(".byte 0x0f, 0xc7, 0xf0; setc %1" + : "=a"(r2), "=q"(ok)::"cc"); + if (ok) { + break; + } + } + return (uint64_t(r2) << 32) | r1; +#elif defined(__x86_64__) || defined(__amd64__) + uint8_t ok; + uint64_t r1; + for (int i = 0; i < 10; ++i) { + // rdrand %rax + __asm__ volatile(".byte 0x48, 0x0f, 0xc7, 0xf0; setc %1" + : "=a"(r1), "=q"(ok)::"cc"); + if (ok) { + break; + } + } + return r1; +#else +#error "RdRand is only supported on x86 and x86_64" +#endif +} + +/** + * Read 64 bits of entropy using rdseed. + * + * Must only be called when RdSeed is supported. + */ +static uint64_t GetRdSeed() noexcept { + // RdSeed may fail when the HW RNG is overloaded. Loop indefinitely until + // enough entropy is gathered, but pause after every failure. +#ifdef __i386__ + uint8_t ok; + uint32_t r1, r2; + do { + // rdseed %eax + __asm__ volatile(".byte 0x0f, 0xc7, 0xf8; setc %1" + : "=a"(r1), "=q"(ok)::"cc"); + if (ok) { + break; + } + __asm__ volatile("pause"); + } while (true); + do { + // rdseed %eax + __asm__ volatile(".byte 0x0f, 0xc7, 0xf8; setc %1" + : "=a"(r2), "=q"(ok)::"cc"); + if (ok) { + break; + } + __asm__ volatile("pause"); + } while (true); + return (uint64_t(r2) << 32) | r1; +#elif defined(__x86_64__) || defined(__amd64__) + uint8_t ok; + uint64_t r1; + do { + // rdseed %rax + __asm__ volatile(".byte 0x48, 0x0f, 0xc7, 0xf8; setc %1" + : "=a"(r1), "=q"(ok)::"cc"); + if (ok) { + break; + } + __asm__ volatile("pause"); + } while (true); + return r1; +#else +#error "RdSeed is only supported on x86 and x86_64" +#endif +} + #else /** * Access to other hardware random number generators could be added here later, @@ -108,45 +230,48 @@ static void ReportHardwareRand() {} #endif -static bool GetHardwareRand(uint8_t *ent32) noexcept { +/** + * Add 64 bits of entropy gathered from hardware to hasher. Do nothing if not + * supported. + */ +static void SeedHardwareFast(CSHA512 &hasher) noexcept { #if defined(__x86_64__) || defined(__amd64__) || defined(__i386__) - if (rdrand_supported) { - uint8_t ok; -// Not all assemblers support the rdrand instruction, write it in hex. -#ifdef __i386__ - for (int iter = 0; iter < 4; ++iter) { - uint32_t r1, r2; - __asm__ volatile(".byte 0x0f, 0xc7, 0xf0;" // rdrand %eax - ".byte 0x0f, 0xc7, 0xf2;" // rdrand %edx - "setc %2" - : "=a"(r1), "=d"(r2), "=q"(ok)::"cc"); - if (!ok) { - return false; - } - WriteLE32(ent32 + 8 * iter, r1); - WriteLE32(ent32 + 8 * iter + 4, r2); + if (g_rdrand_supported) { + uint64_t out = GetRdRand(); + hasher.Write((const uint8_t *)&out, sizeof(out)); + return; + } +#endif +} + +/** + * Add 256 bits of entropy gathered from hardware to hasher. Do nothing if not + * supported. + */ +static void SeedHardwareSlow(CSHA512 &hasher) noexcept { +#if defined(__x86_64__) || defined(__amd64__) || defined(__i386__) + // When we want 256 bits of entropy, prefer RdSeed over RdRand, as it's + // guaranteed to produce independent randomness on every call. + if (g_rdseed_supported) { + for (int i = 0; i < 4; ++i) { + uint64_t out = GetRdSeed(); + hasher.Write((const uint8_t *)&out, sizeof(out)); } -#else - uint64_t r1, r2, r3, r4; - __asm__ volatile(".byte 0x48, 0x0f, 0xc7, 0xf0, " // rdrand %rax - "0x48, 0x0f, 0xc7, 0xf3, " // rdrand %rbx - "0x48, 0x0f, 0xc7, 0xf1, " // rdrand %rcx - "0x48, 0x0f, 0xc7, 0xf2; " // rdrand %rdx - "setc %4" - : "=a"(r1), "=b"(r2), "=c"(r3), "=d"(r4), - "=q"(ok)::"cc"); - if (!ok) { - return false; + return; + } + // When falling back to RdRand, XOR the result of 1024 results. + // This guarantees a reseeding occurs between each. + if (g_rdrand_supported) { + for (int i = 0; i < 4; ++i) { + uint64_t out = 0; + for (int j = 0; j < 1024; ++j) { + out ^= GetRdRand(); + } + hasher.Write((const uint8_t *)&out, sizeof(out)); } - WriteLE64(ent32, r1); - WriteLE64(ent32 + 8, r2); - WriteLE64(ent32 + 16, r3); - WriteLE64(ent32 + 24, r4); -#endif - return true; + return; } #endif - return false; } static void RandAddSeedPerfmon(CSHA512 &hasher) { @@ -426,10 +551,7 @@ hasher.Write((const uint8_t *)&ptr, sizeof(ptr)); // Hardware randomness is very fast when available; use it always. - bool have_hw_rand = GetHardwareRand(buffer); - if (have_hw_rand) { - hasher.Write(buffer, sizeof(buffer)); - } + SeedHardwareFast(hasher); // High-precision timestamp SeedTimestamp(hasher); @@ -480,6 +602,9 @@ RAND_screen(); #endif + // Gather 256 bits of hardware randomness, if available + SeedHardwareSlow(hasher); + // Everything that the 'slow' seeder includes. SeedSlow(hasher);