diff --git a/src/bench/bench.cpp b/src/bench/bench.cpp index a04cf5f00..abb27e8bd 100644 --- a/src/bench/bench.cpp +++ b/src/bench/bench.cpp @@ -1,116 +1,122 @@ // Copyright (c) 2015-2016 The Bitcoin Core developers // Distributed under the MIT software license, see the accompanying // file COPYING or http://www.opensource.org/licenses/mit-license.php. #include "bench.h" #include "perf.h" #include #include -#include benchmark::BenchRunner::BenchmarkMap &benchmark::BenchRunner::benchmarks() { static std::map benchmarks_map; return benchmarks_map; } -static double gettimedouble(void) { - struct timeval tv; - gettimeofday(&tv, nullptr); - return tv.tv_usec * 0.000001 + tv.tv_sec; -} - benchmark::BenchRunner::BenchRunner(std::string name, benchmark::BenchFunction func) { benchmarks().insert(std::make_pair(name, func)); } -void benchmark::BenchRunner::RunAll(double elapsedTimeForOne) { +void benchmark::BenchRunner::RunAll(benchmark::duration elapsedTimeForOne) { perf_init(); + if (std::ratio_less_equal::value) { + std::cerr << "WARNING: Clock precision is worse than microsecond - " + "benchmarks may be less accurate!\n"; + } std::cout << "#Benchmark" << "," << "count" << "," - << "min" + << "min(ns)" << "," - << "max" + << "max(ns)" << "," - << "average" + << "average(ns)" << "," << "min_cycles" << "," << "max_cycles" << "," << "average_cycles" << "\n"; for (const auto &p : benchmarks()) { State state(p.first, elapsedTimeForOne); p.second(state); } perf_fini(); } bool benchmark::State::KeepRunning() { if (count & countMask) { ++count; return true; } - double now; + time_point now; + uint64_t nowCycles; if (count == 0) { - lastTime = beginTime = now = gettimedouble(); + lastTime = beginTime = now = clock::now(); lastCycles = beginCycles = nowCycles = perf_cpucycles(); } else { - now = gettimedouble(); - double elapsed = now - lastTime; - double elapsedOne = elapsed * countMaskInv; + now = clock::now(); + auto elapsed = now - lastTime; + auto elapsedOne = elapsed / (countMask + 1); if (elapsedOne < minTime) minTime = elapsedOne; if (elapsedOne > maxTime) maxTime = elapsedOne; // We only use relative values, so don't have to handle 64-bit // wrap-around specially nowCycles = perf_cpucycles(); - uint64_t elapsedOneCycles = (nowCycles - lastCycles) * countMaskInv; + uint64_t elapsedOneCycles = (nowCycles - lastCycles) / (countMask + 1); if (elapsedOneCycles < minCycles) minCycles = elapsedOneCycles; if (elapsedOneCycles > maxCycles) maxCycles = elapsedOneCycles; if (elapsed * 128 < maxElapsed) { // If the execution was much too fast (1/128th of maxElapsed), // increase the count mask by 8x and restart timing. // The restart avoids including the overhead of this code in the // measurement. countMask = ((countMask << 3) | 7) & ((1LL << 60) - 1); - countMaskInv = 1. / (countMask + 1); count = 0; - minTime = std::numeric_limits::max(); - maxTime = std::numeric_limits::min(); + minTime = duration::max(); + maxTime = duration::zero(); minCycles = std::numeric_limits::max(); maxCycles = std::numeric_limits::min(); return true; } if (elapsed * 16 < maxElapsed) { uint64_t newCountMask = ((countMask << 1) | 1) & ((1LL << 60) - 1); if ((count & newCountMask) == 0) { countMask = newCountMask; - countMaskInv = 1. / (countMask + 1); } } } lastTime = now; lastCycles = nowCycles; ++count; if (now - beginTime < maxElapsed) return true; // Keep going --count; // Output results - double average = (now - beginTime) / count; + // Duration casts are only necessary here because hardware with + // sub-nanosecond clocks + // will lose precision. + int64_t min_elapsed = + std::chrono::duration_cast(minTime).count(); + int64_t max_elapsed = + std::chrono::duration_cast(maxTime).count(); + int64_t avg_elapsed = std::chrono::duration_cast( + (now - beginTime) / count) + .count(); int64_t averageCycles = (nowCycles - beginCycles) / count; std::cout << std::fixed << std::setprecision(15) << name << "," << count - << "," << minTime << "," << maxTime << "," << average << "," - << minCycles << "," << maxCycles << "," << averageCycles << "\n"; + << "," << min_elapsed << "," << max_elapsed << "," << avg_elapsed + << "," << minCycles << "," << maxCycles << "," << averageCycles + << "\n"; return false; } diff --git a/src/bench/bench.h b/src/bench/bench.h index ae0b19a5b..3dfb49e8f 100644 --- a/src/bench/bench.h +++ b/src/bench/bench.h @@ -1,83 +1,94 @@ // Copyright (c) 2015-2016 The Bitcoin Core developers // Distributed under the MIT software license, see the accompanying // file COPYING or http://www.opensource.org/licenses/mit-license.php. #ifndef BITCOIN_BENCH_BENCH_H #define BITCOIN_BENCH_BENCH_H +#include #include #include #include #include #include #include // Simple micro-benchmarking framework; API mostly matches a subset of the // Google Benchmark framework (see https://github.com/google/benchmark). Wny not // use the Google Benchmark framework? Because adding Yet Another Dependency // (that uses cmake as its build system and has lots of features we don't need) // isn't worth it. /* * Usage: static void CODE_TO_TIME(benchmark::State& state) { ... do any setup needed... while (state.KeepRunning()) { ... do stuff you want to time... } ... do any cleanup needed... } BENCHMARK(CODE_TO_TIME); */ namespace benchmark { +// In case high_resolution_clock is steady, prefer that, otherwise use +// steady_clock. +struct best_clock { + using hi_res_clock = std::chrono::high_resolution_clock; + using steady_clock = std::chrono::steady_clock; + using type = std::conditional::type; +}; +using clock = best_clock::type; +using time_point = clock::time_point; +using duration = clock::duration; class State { std::string name; - double maxElapsed; - double beginTime; - double lastTime, minTime, maxTime, countMaskInv; + duration maxElapsed; + time_point beginTime, lastTime; + duration minTime, maxTime; uint64_t count; uint64_t countMask; uint64_t beginCycles; uint64_t lastCycles; uint64_t minCycles; uint64_t maxCycles; public: - State(std::string _name, double _maxElapsed) + State(std::string _name, duration _maxElapsed) : name(_name), maxElapsed(_maxElapsed), count(0) { - minTime = std::numeric_limits::max(); - maxTime = std::numeric_limits::min(); + minTime = duration::max(); + maxTime = duration::zero(); minCycles = std::numeric_limits::max(); maxCycles = std::numeric_limits::min(); countMask = 1; - countMaskInv = 1. / (countMask + 1); } bool KeepRunning(); }; typedef std::function BenchFunction; class BenchRunner { typedef std::map BenchmarkMap; static BenchmarkMap &benchmarks(); public: BenchRunner(std::string name, BenchFunction func); - static void RunAll(double elapsedTimeForOne = 1.0); + static void RunAll(duration elapsedTimeForOne = std::chrono::seconds(1)); }; -} // namespace benchmark +} // BENCHMARK(foo) expands to: benchmark::BenchRunner bench_11foo("foo", foo); #define BENCHMARK(n) \ benchmark::BenchRunner BOOST_PP_CAT(bench_, BOOST_PP_CAT(__LINE__, n))( \ BOOST_PP_STRINGIZE(n), n); #endif // BITCOIN_BENCH_BENCH_H diff --git a/src/bench/rollingbloom.cpp b/src/bench/rollingbloom.cpp index 0a5a22d87..b269038ef 100644 --- a/src/bench/rollingbloom.cpp +++ b/src/bench/rollingbloom.cpp @@ -1,44 +1,44 @@ // Copyright (c) 2016 The Bitcoin Core developers // Distributed under the MIT software license, see the accompanying // file COPYING or http://www.opensource.org/licenses/mit-license.php. #include #include "bench.h" #include "bloom.h" -#include "utiltime.h" static void RollingBloom(benchmark::State &state) { CRollingBloomFilter filter(120000, 0.000001); std::vector data(32); uint32_t count = 0; uint32_t nEntriesPerGeneration = (120000 + 1) / 2; uint32_t countnow = 0; uint64_t match = 0; while (state.KeepRunning()) { count++; data[0] = count; data[1] = count >> 8; data[2] = count >> 16; data[3] = count >> 24; if (countnow == nEntriesPerGeneration) { - int64_t b = GetTimeMicros(); + auto b = benchmark::clock::now(); filter.insert(data); - int64_t e = GetTimeMicros(); - std::cout << "RollingBloom-refresh,1," << (e - b) * 0.000001 << "," - << (e - b) * 0.000001 << "," << (e - b) * 0.000001 - << "\n"; + auto total = std::chrono::duration_cast( + benchmark::clock::now() - b) + .count(); + std::cout << "RollingBloom-refresh,1," << total << "," << total + << "," << total << "\n"; countnow = 0; } else { filter.insert(data); } countnow++; data[0] = count >> 24; data[1] = count >> 16; data[2] = count >> 8; data[3] = count; match += filter.contains(data); } } BENCHMARK(RollingBloom);