diff --git a/src/bench/Examples.cpp b/src/bench/Examples.cpp index 07b567f77..a86989724 100644 --- a/src/bench/Examples.cpp +++ b/src/bench/Examples.cpp @@ -1,32 +1,32 @@ // Copyright (c) 2015-2016 The Bitcoin Core developers // Distributed under the MIT software license, see the accompanying // file COPYING or http://www.opensource.org/licenses/mit-license.php. #include "bench.h" #include "utiltime.h" #include "validation.h" // Sanity test: this should loop ten times, and // min/max/average should be close to 100ms. static void Sleep100ms(benchmark::State &state) { while (state.KeepRunning()) { MilliSleep(100); } } -BENCHMARK(Sleep100ms); +BENCHMARK(Sleep100ms, 10); // Extremely fast-running benchmark: #include volatile double sum = 0.0; // volatile, global so not optimized away static void Trig(benchmark::State &state) { double d = 0.01; while (state.KeepRunning()) { sum += sin(d); d += 0.000001; } } -BENCHMARK(Trig); +BENCHMARK(Trig, 12 * 1000 * 1000); diff --git a/src/bench/base58.cpp b/src/bench/base58.cpp index 7b64b8b6b..55dbabf54 100644 --- a/src/bench/base58.cpp +++ b/src/bench/base58.cpp @@ -1,44 +1,44 @@ // Copyright (c) 2016 The Bitcoin Core developers // Distributed under the MIT software license, see the accompanying // file COPYING or http://www.opensource.org/licenses/mit-license.php. #include "bench.h" #include "base58.h" #include "validation.h" #include #include #include static void Base58Encode(benchmark::State &state) { static const std::vector buffer = { 17, 79, 8, 99, 150, 189, 208, 162, 22, 23, 203, 163, 36, 58, 147, 227, 139, 2, 215, 100, 91, 38, 11, 141, 253, 40, 117, 21, 16, 90, 200, 24}; while (state.KeepRunning()) { EncodeBase58(buffer); } } static void Base58CheckEncode(benchmark::State &state) { static const std::vector buffer = { 17, 79, 8, 99, 150, 189, 208, 162, 22, 23, 203, 163, 36, 58, 147, 227, 139, 2, 215, 100, 91, 38, 11, 141, 253, 40, 117, 21, 16, 90, 200, 24}; while (state.KeepRunning()) { EncodeBase58Check(buffer); } } static void Base58Decode(benchmark::State &state) { const char *addr = "17VZNX1SN5NtKa8UQFxwQbFeFc3iqRYhem"; std::vector vch; while (state.KeepRunning()) { DecodeBase58(addr, vch); } } -BENCHMARK(Base58Encode); -BENCHMARK(Base58CheckEncode); -BENCHMARK(Base58Decode); +BENCHMARK(Base58Encode, 470 * 1000); +BENCHMARK(Base58CheckEncode, 320 * 1000); +BENCHMARK(Base58Decode, 800 * 1000); diff --git a/src/bench/bench.cpp b/src/bench/bench.cpp index abb27e8bd..f75360e16 100644 --- a/src/bench/bench.cpp +++ b/src/bench/bench.cpp @@ -1,122 +1,144 @@ // Copyright (c) 2015-2016 The Bitcoin Core developers // Distributed under the MIT software license, see the accompanying // file COPYING or http://www.opensource.org/licenses/mit-license.php. #include "bench.h" #include "perf.h" +#include +#include #include #include +#include +#include + +void benchmark::ConsolePrinter::header() { + std::cout << "# Benchmark, evals, iterations, total, min, max, median" + << std::endl; +} + +void benchmark::ConsolePrinter::result(const State &state) { + auto results = state.m_elapsed_results; + std::sort(results.begin(), results.end()); + + double total = state.m_num_iters * + std::accumulate(results.begin(), results.end(), 0.0); + + double front = 0; + double back = 0; + double median = 0; + + if (!results.empty()) { + front = results.front(); + back = results.back(); + + size_t mid = results.size() / 2; + median = results[mid]; + if (0 == results.size() % 2) { + median = (results[mid] + results[mid + 1]) / 2; + } + } + + std::cout << std::setprecision(6); + std::cout << state.m_name << ", " << state.m_num_evals << ", " + << state.m_num_iters << ", " << total << ", " << front << ", " + << back << ", " << median << std::endl; +} + +void benchmark::ConsolePrinter::footer() {} +benchmark::PlotlyPrinter::PlotlyPrinter(std::string plotly_url, int64_t width, + int64_t height) + : m_plotly_url(plotly_url), m_width(width), m_height(height) {} + +void benchmark::PlotlyPrinter::header() { + std::cout << "" + << "" + << "

" + << ""; +} benchmark::BenchRunner::BenchmarkMap &benchmark::BenchRunner::benchmarks() { - static std::map benchmarks_map; + static std::map benchmarks_map; return benchmarks_map; } benchmark::BenchRunner::BenchRunner(std::string name, - benchmark::BenchFunction func) { - benchmarks().insert(std::make_pair(name, func)); + benchmark::BenchFunction func, + uint64_t num_iters_for_one_second) { + benchmarks().insert( + std::make_pair(name, Bench{func, num_iters_for_one_second})); } -void benchmark::BenchRunner::RunAll(benchmark::duration elapsedTimeForOne) { +void benchmark::BenchRunner::RunAll(Printer &printer, uint64_t num_evals, + double scaling, const std::string &filter, + bool is_list_only) { perf_init(); - if (std::ratio_less_equal::value) { + if (!std::ratio_less_equal::value) { std::cerr << "WARNING: Clock precision is worse than microsecond - " "benchmarks may be less accurate!\n"; } - std::cout << "#Benchmark" - << "," - << "count" - << "," - << "min(ns)" - << "," - << "max(ns)" - << "," - << "average(ns)" - << "," - << "min_cycles" - << "," - << "max_cycles" - << "," - << "average_cycles" - << "\n"; + + std::regex reFilter(filter); + std::smatch baseMatch; + + printer.header(); for (const auto &p : benchmarks()) { - State state(p.first, elapsedTimeForOne); - p.second(state); + if (!std::regex_match(p.first, baseMatch, reFilter)) { + continue; + } + + uint64_t num_iters = + static_cast(p.second.num_iters_for_one_second * scaling); + if (0 == num_iters) { + num_iters = 1; + } + State state(p.first, num_evals, num_iters, printer); + if (!is_list_only) { + p.second.func(state); + } + printer.result(state); } + + printer.footer(); + perf_fini(); } -bool benchmark::State::KeepRunning() { - if (count & countMask) { - ++count; - return true; - } - time_point now; - - uint64_t nowCycles; - if (count == 0) { - lastTime = beginTime = now = clock::now(); - lastCycles = beginCycles = nowCycles = perf_cpucycles(); - } else { - now = clock::now(); - auto elapsed = now - lastTime; - auto elapsedOne = elapsed / (countMask + 1); - if (elapsedOne < minTime) minTime = elapsedOne; - if (elapsedOne > maxTime) maxTime = elapsedOne; - - // We only use relative values, so don't have to handle 64-bit - // wrap-around specially - nowCycles = perf_cpucycles(); - uint64_t elapsedOneCycles = (nowCycles - lastCycles) / (countMask + 1); - if (elapsedOneCycles < minCycles) minCycles = elapsedOneCycles; - if (elapsedOneCycles > maxCycles) maxCycles = elapsedOneCycles; - - if (elapsed * 128 < maxElapsed) { - // If the execution was much too fast (1/128th of maxElapsed), - // increase the count mask by 8x and restart timing. - // The restart avoids including the overhead of this code in the - // measurement. - countMask = ((countMask << 3) | 7) & ((1LL << 60) - 1); - count = 0; - minTime = duration::max(); - maxTime = duration::zero(); - minCycles = std::numeric_limits::max(); - maxCycles = std::numeric_limits::min(); - return true; - } - if (elapsed * 16 < maxElapsed) { - uint64_t newCountMask = ((countMask << 1) | 1) & ((1LL << 60) - 1); - if ((count & newCountMask) == 0) { - countMask = newCountMask; - } +bool benchmark::State::UpdateTimer(const benchmark::time_point current_time) { + if (m_start_time != time_point()) { + std::chrono::duration diff = current_time - m_start_time; + m_elapsed_results.push_back(diff.count() / m_num_iters); + + if (m_elapsed_results.size() == m_num_evals) { + return false; } } - lastTime = now; - lastCycles = nowCycles; - ++count; - - if (now - beginTime < maxElapsed) return true; // Keep going - - --count; - - // Output results - // Duration casts are only necessary here because hardware with - // sub-nanosecond clocks - // will lose precision. - int64_t min_elapsed = - std::chrono::duration_cast(minTime).count(); - int64_t max_elapsed = - std::chrono::duration_cast(maxTime).count(); - int64_t avg_elapsed = std::chrono::duration_cast( - (now - beginTime) / count) - .count(); - int64_t averageCycles = (nowCycles - beginCycles) / count; - std::cout << std::fixed << std::setprecision(15) << name << "," << count - << "," << min_elapsed << "," << max_elapsed << "," << avg_elapsed - << "," << minCycles << "," << maxCycles << "," << averageCycles - << "\n"; - - return false; + + m_num_iters_left = m_num_iters - 1; + return true; } diff --git a/src/bench/bench.h b/src/bench/bench.h index 3dfb49e8f..dc289d5ea 100644 --- a/src/bench/bench.h +++ b/src/bench/bench.h @@ -1,94 +1,144 @@ // Copyright (c) 2015-2016 The Bitcoin Core developers // Distributed under the MIT software license, see the accompanying // file COPYING or http://www.opensource.org/licenses/mit-license.php. #ifndef BITCOIN_BENCH_BENCH_H #define BITCOIN_BENCH_BENCH_H #include #include #include #include #include +#include #include #include // Simple micro-benchmarking framework; API mostly matches a subset of the -// Google Benchmark framework (see https://github.com/google/benchmark). Wny not -// use the Google Benchmark framework? Because adding Yet Another Dependency -// (that uses cmake as its build system and has lots of features we don't need) -// isn't worth it. +// Google Benchmark framework (see https://github.com/google/benchmark) +// Why not use the Google Benchmark framework? Because adding Yet Another +// Dependency (that uses cmake as its build system and has lots of features we +// don't need) isn't worth it. /* * Usage: static void CODE_TO_TIME(benchmark::State& state) { ... do any setup needed... while (state.KeepRunning()) { ... do stuff you want to time... } ... do any cleanup needed... } -BENCHMARK(CODE_TO_TIME); +// default to running benchmark for 5000 iterations +BENCHMARK(CODE_TO_TIME, 5000); */ namespace benchmark { // In case high_resolution_clock is steady, prefer that, otherwise use // steady_clock. struct best_clock { using hi_res_clock = std::chrono::high_resolution_clock; using steady_clock = std::chrono::steady_clock; using type = std::conditional::type; }; using clock = best_clock::type; using time_point = clock::time_point; using duration = clock::duration; -class State { - std::string name; - duration maxElapsed; - time_point beginTime, lastTime; - duration minTime, maxTime; - uint64_t count; - uint64_t countMask; - uint64_t beginCycles; - uint64_t lastCycles; - uint64_t minCycles; - uint64_t maxCycles; +class Printer; +class State { public: - State(std::string _name, duration _maxElapsed) - : name(_name), maxElapsed(_maxElapsed), count(0) { - minTime = duration::max(); - maxTime = duration::zero(); - minCycles = std::numeric_limits::max(); - maxCycles = std::numeric_limits::min(); - countMask = 1; + std::string m_name; + uint64_t m_num_iters_left; + const uint64_t m_num_iters; + const uint64_t m_num_evals; + std::vector m_elapsed_results; + time_point m_start_time; + + bool UpdateTimer(time_point finish_time); + + State(std::string name, uint64_t num_evals, double num_iters, + Printer &printer) + : m_name(name), m_num_iters_left(0), m_num_iters(num_iters), + m_num_evals(num_evals) {} + + inline bool KeepRunning() { + if (m_num_iters_left--) { + return true; + } + + bool result = UpdateTimer(clock::now()); + // measure again so runtime of UpdateTimer is not included + m_start_time = clock::now(); + return result; } - bool KeepRunning(); }; typedef std::function BenchFunction; class BenchRunner { - typedef std::map BenchmarkMap; + struct Bench { + BenchFunction func; + uint64_t num_iters_for_one_second; + }; + typedef std::map BenchmarkMap; static BenchmarkMap &benchmarks(); public: - BenchRunner(std::string name, BenchFunction func); + BenchRunner(std::string name, BenchFunction func, + uint64_t num_iters_for_one_second); + + static void RunAll(Printer &printer, uint64_t num_evals, double scaling, + const std::string &filter, bool is_list_only); +}; - static void RunAll(duration elapsedTimeForOne = std::chrono::seconds(1)); +// interface to output benchmark results. +class Printer { +public: + virtual ~Printer() {} + virtual void header() = 0; + virtual void result(const State &state) = 0; + virtual void footer() = 0; +}; + +// default printer to console, shows min, max, median. +class ConsolePrinter : public Printer { +public: + void header(); + void result(const State &state); + void footer(); +}; + +// creates box plot with plotly.js +class PlotlyPrinter : public Printer { +public: + PlotlyPrinter(std::string plotly_url, int64_t width, int64_t height); + void header(); + void result(const State &state); + void footer(); + +private: + std::string m_plotly_url; + int64_t m_width; + int64_t m_height; }; } -// BENCHMARK(foo) expands to: benchmark::BenchRunner bench_11foo("foo", foo); -#define BENCHMARK(n) \ +// BENCHMARK(foo, num_iters_for_one_second) expands to: benchmark::BenchRunner +// bench_11foo("foo", num_iterations); +// Choose a num_iters_for_one_second that takes roughly 1 second. The goal is +// that all benchmarks should take approximately +// the same time, and scaling factor can be used that the total time is +// appropriate for your system. +#define BENCHMARK(n, num_iters_for_one_second) \ benchmark::BenchRunner BOOST_PP_CAT(bench_, BOOST_PP_CAT(__LINE__, n))( \ - BOOST_PP_STRINGIZE(n), n); + BOOST_PP_STRINGIZE(n), n, (num_iters_for_one_second)); #endif // BITCOIN_BENCH_BENCH_H diff --git a/src/bench/bench_bitcoin.cpp b/src/bench/bench_bitcoin.cpp index 59a5a4558..87d847480 100644 --- a/src/bench/bench_bitcoin.cpp +++ b/src/bench/bench_bitcoin.cpp @@ -1,25 +1,98 @@ // Copyright (c) 2015-2016 The Bitcoin Core developers // Distributed under the MIT software license, see the accompanying // file COPYING or http://www.opensource.org/licenses/mit-license.php. #include "bench.h" #include "crypto/sha256.h" #include "key.h" #include "random.h" #include "util.h" #include "validation.h" +#include + +#include + +static const int64_t DEFAULT_BENCH_EVALUATIONS = 5; +static const char *DEFAULT_BENCH_FILTER = ".*"; +static const char *DEFAULT_BENCH_SCALING = "1.0"; +static const char *DEFAULT_BENCH_PRINTER = "console"; +static const char *DEFAULT_PLOT_PLOTLYURL = + "https://cdn.plot.ly/plotly-latest.min.js"; +static const int64_t DEFAULT_PLOT_WIDTH = 1024; +static const int64_t DEFAULT_PLOT_HEIGHT = 768; + int main(int argc, char **argv) { + gArgs.ParseParameters(argc, argv); + + if (gArgs.IsArgSet("-?") || gArgs.IsArgSet("-h") || + gArgs.IsArgSet("-help")) { + std::cout + << HelpMessageGroup(_("Options:")) + << HelpMessageOpt("-?", _("Print this help message and exit")) + << HelpMessageOpt("-list", + _("List benchmarks without executing them. Can " + "be combined with -scaling and -filter")) + << HelpMessageOpt("-evals=", + strprintf(_("Number of measurement evaluations " + "to perform. (default: %u)"), + DEFAULT_BENCH_EVALUATIONS)) + << HelpMessageOpt("-filter=", + strprintf(_("Regular expression filter to select " + "benchmark by name (default: %s)"), + DEFAULT_BENCH_FILTER)) + << HelpMessageOpt("-scaling=", + strprintf(_("Scaling factor for benchmark's " + "runtime (default: %u)"), + DEFAULT_BENCH_SCALING)) + << HelpMessageOpt( + "-printer=(console|plot)", + strprintf(_("Choose printer format. console: print data to " + "console. plot: Print results as HTML graph " + "(default: %s)"), + DEFAULT_BENCH_PRINTER)) + << HelpMessageOpt( + "-plot-plotlyurl=", + strprintf(_("URL to use for plotly.js (default: %s)"), + DEFAULT_PLOT_PLOTLYURL)) + << HelpMessageOpt("-plot-width=", + strprintf(_("Plot width in pixel (default: %u)"), + DEFAULT_PLOT_WIDTH)) + << HelpMessageOpt("-plot-height=", + strprintf(_("Plot height in pixel (default: %u)"), + DEFAULT_PLOT_HEIGHT)); + + return 0; + } + SHA256AutoDetect(); RandomInit(); ECC_Start(); SetupEnvironment(); // don't want to write to debug.log file GetLogger().fPrintToDebugLog = false; - benchmark::BenchRunner::RunAll(); + int64_t evaluations = gArgs.GetArg("-evals", DEFAULT_BENCH_EVALUATIONS); + std::string regex_filter = gArgs.GetArg("-filter", DEFAULT_BENCH_FILTER); + std::string scaling_str = gArgs.GetArg("-scaling", DEFAULT_BENCH_SCALING); + bool is_list_only = gArgs.GetBoolArg("-list", false); + + double scaling_factor = boost::lexical_cast(scaling_str); + + std::unique_ptr printer( + new benchmark::ConsolePrinter()); + std::string printer_arg = gArgs.GetArg("-printer", DEFAULT_BENCH_PRINTER); + if ("plot" == printer_arg) { + printer.reset(new benchmark::PlotlyPrinter( + gArgs.GetArg("-plot-plotlyurl", DEFAULT_PLOT_PLOTLYURL), + gArgs.GetArg("-plot-width", DEFAULT_PLOT_WIDTH), + gArgs.GetArg("-plot-height", DEFAULT_PLOT_HEIGHT))); + } + + benchmark::BenchRunner::RunAll(*printer, evaluations, scaling_factor, + regex_filter, is_list_only); ECC_Stop(); } diff --git a/src/bench/cashaddr.cpp b/src/bench/cashaddr.cpp index c258a19f4..f38bf6874 100644 --- a/src/bench/cashaddr.cpp +++ b/src/bench/cashaddr.cpp @@ -1,32 +1,32 @@ // Copyright (c) 2018 The Bitcoin developers // Distributed under the MIT software license, see the accompanying // file COPYING or http://www.opensource.org/licenses/mit-license.php. #include "cashaddr.h" #include "bench.h" #include #include static void CashAddrEncode(benchmark::State &state) { std::vector buffer = {17, 79, 8, 99, 150, 189, 208, 162, 22, 23, 203, 163, 36, 58, 147, 227, 139, 2, 215, 100, 91, 38, 11, 141, 253, 40, 117, 21, 16, 90, 200, 24}; while (state.KeepRunning()) { cashaddr::Encode("bitcoincash", buffer); } } static void CashAddrDecode(benchmark::State &state) { const char *addrWithPrefix = "bitcoincash:qprnwmr02d7ky9m693qufj5mgkpf4wvssv0w86tkjd"; const char *addrNoPrefix = "qprnwmr02d7ky9m693qufj5mgkpf4wvssv0w86tkjd"; while (state.KeepRunning()) { cashaddr::Decode(addrWithPrefix, "bitcoincash"); cashaddr::Decode(addrNoPrefix, "bitcoincash"); } } -BENCHMARK(CashAddrEncode); -BENCHMARK(CashAddrDecode); +BENCHMARK(CashAddrEncode, 800 * 1000); +BENCHMARK(CashAddrDecode, 800 * 1000); diff --git a/src/bench/ccoins_caching.cpp b/src/bench/ccoins_caching.cpp index 75791407a..0f28fac4a 100644 --- a/src/bench/ccoins_caching.cpp +++ b/src/bench/ccoins_caching.cpp @@ -1,90 +1,90 @@ // Copyright (c) 2016 The Bitcoin Core developers // Distributed under the MIT software license, see the accompanying // file COPYING or http://www.opensource.org/licenses/mit-license.php. #include "bench.h" #include "coins.h" #include "policy/policy.h" #include "wallet/crypter.h" #include // FIXME: Dedup with SetupDummyInputs in test/transaction_tests.cpp. // // Helper: create two dummy transactions, each with // two outputs. The first has 11 and 50 CENT outputs // paid to a TX_PUBKEY, the second 21 and 22 CENT outputs // paid to a TX_PUBKEYHASH. // static std::vector SetupDummyInputs(CBasicKeyStore &keystoreRet, CCoinsViewCache &coinsRet) { std::vector dummyTransactions; dummyTransactions.resize(2); // Add some keys to the keystore: CKey key[4]; for (int i = 0; i < 4; i++) { key[i].MakeNewKey(i % 2); keystoreRet.AddKey(key[i]); } // Create some dummy input transactions dummyTransactions[0].vout.resize(2); dummyTransactions[0].vout[0].nValue = 11 * CENT; dummyTransactions[0].vout[0].scriptPubKey << ToByteVector(key[0].GetPubKey()) << OP_CHECKSIG; dummyTransactions[0].vout[1].nValue = 50 * CENT; dummyTransactions[0].vout[1].scriptPubKey << ToByteVector(key[1].GetPubKey()) << OP_CHECKSIG; AddCoins(coinsRet, CTransaction(dummyTransactions[0]), 0); dummyTransactions[1].vout.resize(2); dummyTransactions[1].vout[0].nValue = 21 * CENT; dummyTransactions[1].vout[0].scriptPubKey = GetScriptForDestination(key[2].GetPubKey().GetID()); dummyTransactions[1].vout[1].nValue = 22 * CENT; dummyTransactions[1].vout[1].scriptPubKey = GetScriptForDestination(key[3].GetPubKey().GetID()); AddCoins(coinsRet, CTransaction(dummyTransactions[1]), 0); return dummyTransactions; } // Microbenchmark for simple accesses to a CCoinsViewCache database. Note from // laanwj, "replicating the actual usage patterns of the client is hard though, // many times micro-benchmarks of the database showed completely different // characteristics than e.g. reindex timings. But that's not a requirement of // every benchmark." // (https://github.com/bitcoin/bitcoin/issues/7883#issuecomment-224807484) static void CCoinsCaching(benchmark::State &state) { CBasicKeyStore keystore; CCoinsView coinsDummy; CCoinsViewCache coins(&coinsDummy); std::vector dummyTransactions = SetupDummyInputs(keystore, coins); CMutableTransaction t1; t1.vin.resize(3); t1.vin[0].prevout = COutPoint(dummyTransactions[0].GetId(), 1); t1.vin[0].scriptSig << std::vector(65, 0); t1.vin[1].prevout = COutPoint(dummyTransactions[1].GetId(), 0); t1.vin[1].scriptSig << std::vector(65, 0) << std::vector(33, 4); t1.vin[2].prevout = COutPoint(dummyTransactions[1].GetId(), 1); t1.vin[2].scriptSig << std::vector(65, 0) << std::vector(33, 4); t1.vout.resize(2); t1.vout[0].nValue = 90 * CENT; t1.vout[0].scriptPubKey << OP_1; // Benchmark. while (state.KeepRunning()) { CTransaction t(t1); bool success = AreInputsStandard(t, coins); assert(success); Amount value = coins.GetValueIn(t); assert(value == (50 + 21 + 22) * CENT); } } -BENCHMARK(CCoinsCaching); +BENCHMARK(CCoinsCaching, 170 * 1000); diff --git a/src/bench/checkblock.cpp b/src/bench/checkblock.cpp index a2c61f442..a5303d4fd 100644 --- a/src/bench/checkblock.cpp +++ b/src/bench/checkblock.cpp @@ -1,57 +1,57 @@ // Copyright (c) 2016 The Bitcoin Core developers // Distributed under the MIT software license, see the accompanying // file COPYING or http://www.opensource.org/licenses/mit-license.php. #include "bench.h" #include "config.h" #include "consensus/validation.h" #include "streams.h" #include "validation.h" namespace block_bench { #include "bench/data/block413567.raw.h" } // These are the two major time-sinks which happen after we have fully received // a block off the wire, but before we can relay the block on to peers using // compact block relay. static void DeserializeBlockTest(benchmark::State &state) { CDataStream stream((const char *)block_bench::block413567, (const char *)&block_bench::block413567[sizeof( block_bench::block413567)], SER_NETWORK, PROTOCOL_VERSION); char a; stream.write(&a, 1); // Prevent compaction while (state.KeepRunning()) { CBlock block; stream >> block; assert(stream.Rewind(sizeof(block_bench::block413567))); } } static void DeserializeAndCheckBlockTest(benchmark::State &state) { CDataStream stream((const char *)block_bench::block413567, (const char *)&block_bench::block413567[sizeof( block_bench::block413567)], SER_NETWORK, PROTOCOL_VERSION); char a; stream.write(&a, 1); // Prevent compaction const Config &config = GetConfig(); while (state.KeepRunning()) { // Note that CBlock caches its checked state, so we need to recreate it // here. CBlock block; stream >> block; assert(stream.Rewind(sizeof(block_bench::block413567))); CValidationState validationState; assert(CheckBlock(config, block, validationState)); } } -BENCHMARK(DeserializeBlockTest); -BENCHMARK(DeserializeAndCheckBlockTest); +BENCHMARK(DeserializeBlockTest, 130); +BENCHMARK(DeserializeAndCheckBlockTest, 160); diff --git a/src/bench/checkqueue.cpp b/src/bench/checkqueue.cpp index 608e23227..4533c622e 100644 --- a/src/bench/checkqueue.cpp +++ b/src/bench/checkqueue.cpp @@ -1,94 +1,58 @@ // Copyright (c) 2015 The Bitcoin Core developers // Distributed under the MIT software license, see the accompanying // file COPYING or http://www.opensource.org/licenses/mit-license.php. #include "checkqueue.h" #include "bench.h" #include "prevector.h" #include "random.h" #include "util.h" #include "validation.h" #include #include -// This Benchmark tests the CheckQueue with the lightest weight Checks, so it -// should make any lock contention particularly visible static const int MIN_CORES = 2; static const size_t BATCHES = 101; static const size_t BATCH_SIZE = 30; static const int PREVECTOR_SIZE = 28; -static const int QUEUE_BATCH_SIZE = 128; -static void CCheckQueueSpeed(benchmark::State &state) { - struct FakeJobNoWork { - bool operator()() { return true; } - void swap(FakeJobNoWork &x){}; - }; - CCheckQueue queue{QUEUE_BATCH_SIZE}; - boost::thread_group tg; - for (auto x = 0; x < std::max(MIN_CORES, GetNumCores()); ++x) { - tg.create_thread([&] { queue.Thread(); }); - } - while (state.KeepRunning()) { - CCheckQueueControl control(&queue); - - // We call Add a number of times to simulate the behavior of adding a - // block of transactions at once. - std::vector> vBatches(BATCHES); - for (auto &vChecks : vBatches) { - vChecks.resize(BATCH_SIZE); - } - - for (auto &vChecks : vBatches) { - // We can't make vChecks in the inner loop because we want to - // measure the cost of getting the memory to each thread and we - // might get the same memory - control.Add(vChecks); - } - // control waits for completion by RAII, but it is done explicitly here - // for clarity - control.Wait(); - } - tg.interrupt_all(); - tg.join_all(); -} +static const size_t QUEUE_BATCH_SIZE = 128; // This Benchmark tests the CheckQueue with a slightly realistic workload, where // checks all contain a prevector that is indirect 50% of the time and there is // a little bit of work done between calls to Add. static void CCheckQueueSpeedPrevectorJob(benchmark::State &state) { struct PrevectorJob { prevector p; PrevectorJob() {} PrevectorJob(FastRandomContext &insecure_rand) { p.resize(insecure_rand.randrange(PREVECTOR_SIZE * 2)); } bool operator()() { return true; } void swap(PrevectorJob &x) { p.swap(x.p); }; }; CCheckQueue queue{QUEUE_BATCH_SIZE}; boost::thread_group tg; for (auto x = 0; x < std::max(MIN_CORES, GetNumCores()); ++x) { tg.create_thread([&] { queue.Thread(); }); } while (state.KeepRunning()) { // Make insecure_rand here so that each iteration is identical. FastRandomContext insecure_rand(true); CCheckQueueControl control(&queue); std::vector> vBatches(BATCHES); for (auto &vChecks : vBatches) { vChecks.reserve(BATCH_SIZE); for (size_t x = 0; x < BATCH_SIZE; ++x) vChecks.emplace_back(insecure_rand); control.Add(vChecks); } // control waits for completion by RAII, but it is done explicitly here // for clarity control.Wait(); } tg.interrupt_all(); tg.join_all(); } -BENCHMARK(CCheckQueueSpeed); -BENCHMARK(CCheckQueueSpeedPrevectorJob); +BENCHMARK(CCheckQueueSpeedPrevectorJob, 1400); diff --git a/src/bench/coin_selection.cpp b/src/bench/coin_selection.cpp index 923b54921..064b31944 100644 --- a/src/bench/coin_selection.cpp +++ b/src/bench/coin_selection.cpp @@ -1,65 +1,65 @@ // Copyright (c) 2012-2016 The Bitcoin Core developers // Distributed under the MIT software license, see the accompanying // file COPYING or http://www.opensource.org/licenses/mit-license.php. #include "bench.h" #include "chainparams.h" #include "wallet/wallet.h" #include static void addCoin(const Amount nValue, const CWallet &wallet, std::vector &vCoins) { int nInput = 0; static int nextLockTime = 0; CMutableTransaction tx; // so all transactions get different hashes tx.nLockTime = nextLockTime++; tx.vout.resize(nInput + 1); tx.vout[nInput].nValue = nValue; CWalletTx *wtx = new CWalletTx(&wallet, MakeTransactionRef(std::move(tx))); int nAge = 6 * 24; COutput output(wtx, nInput, nAge, true /* spendable */, true /* solvable */, true /* safe */); vCoins.push_back(output); } // Simple benchmark for wallet coin selection. Note that it maybe be necessary // to build up more complicated scenarios in order to get meaningful // measurements of performance. From laanwj, "Wallet coin selection is probably // the hardest, as you need a wider selection of scenarios, just testing the // same one over and over isn't too useful. Generating random isn't useful // either for measurements." // (https://github.com/bitcoin/bitcoin/issues/7883#issuecomment-224807484) static void CoinSelection(benchmark::State &state) { SelectParams(CBaseChainParams::REGTEST); const CWallet wallet(Params()); std::vector vCoins; LOCK(wallet.cs_wallet); while (state.KeepRunning()) { // Empty wallet. for (COutput output : vCoins) { delete output.tx; } vCoins.clear(); // Add coins. for (int i = 0; i < 1000; i++) addCoin(1000 * COIN, wallet, vCoins); addCoin(3 * COIN, wallet, vCoins); std::set> setCoinsRet; Amount nValueRet; bool success = wallet.SelectCoinsMinConf(1003 * COIN, 1, 6, 0, vCoins, setCoinsRet, nValueRet); assert(success); assert(nValueRet == 1003 * COIN); assert(setCoinsRet.size() == 2); } } -BENCHMARK(CoinSelection); +BENCHMARK(CoinSelection, 650); diff --git a/src/bench/crypto_hash.cpp b/src/bench/crypto_hash.cpp index fdefe09e0..a3213c70a 100644 --- a/src/bench/crypto_hash.cpp +++ b/src/bench/crypto_hash.cpp @@ -1,95 +1,88 @@ // Copyright (c) 2016 The Bitcoin Core developers // Distributed under the MIT software license, see the accompanying // file COPYING or http://www.opensource.org/licenses/mit-license.php. #include #include "bench.h" #include "bloom.h" #include "crypto/ripemd160.h" #include "crypto/sha1.h" #include "crypto/sha256.h" #include "crypto/sha512.h" #include "hash.h" #include "random.h" #include "uint256.h" #include "utiltime.h" /* Number of bytes to hash per iteration */ static const uint64_t BUFFER_SIZE = 1000 * 1000; static void RIPEMD160(benchmark::State &state) { uint8_t hash[CRIPEMD160::OUTPUT_SIZE]; std::vector in(BUFFER_SIZE, 0); while (state.KeepRunning()) CRIPEMD160().Write(in.data(), in.size()).Finalize(hash); } static void SHA1(benchmark::State &state) { uint8_t hash[CSHA1::OUTPUT_SIZE]; std::vector in(BUFFER_SIZE, 0); while (state.KeepRunning()) CSHA1().Write(in.data(), in.size()).Finalize(hash); } static void SHA256(benchmark::State &state) { uint8_t hash[CSHA256::OUTPUT_SIZE]; std::vector in(BUFFER_SIZE, 0); while (state.KeepRunning()) CSHA256().Write(in.data(), in.size()).Finalize(hash); } static void SHA256_32b(benchmark::State &state) { std::vector in(32, 0); while (state.KeepRunning()) { - for (int i = 0; i < 1000000; i++) { - CSHA256().Write(in.data(), in.size()).Finalize(&in[0]); - } + CSHA256().Write(in.data(), in.size()).Finalize(in.data()); } } static void SHA512(benchmark::State &state) { uint8_t hash[CSHA512::OUTPUT_SIZE]; std::vector in(BUFFER_SIZE, 0); while (state.KeepRunning()) CSHA512().Write(in.data(), in.size()).Finalize(hash); } static void SipHash_32b(benchmark::State &state) { uint256 x; + uint64_t k1 = 0; while (state.KeepRunning()) { - for (int i = 0; i < 1000000; i++) { - *((uint64_t *)x.begin()) = SipHashUint256(0, i, x); - } + *((uint64_t *)x.begin()) = SipHashUint256(0, ++k1, x); } } static void FastRandom_32bit(benchmark::State &state) { FastRandomContext rng(true); uint32_t x = 0; while (state.KeepRunning()) { - for (int i = 0; i < 1000000; i++) { - x += rng.rand32(); - } + x += rng.rand32(); } } static void FastRandom_1bit(benchmark::State &state) { FastRandomContext rng(true); uint32_t x = 0; while (state.KeepRunning()) { - for (int i = 0; i < 1000000; i++) { - x += rng.randbool(); - } + x += rng.randbool(); } } -BENCHMARK(RIPEMD160); -BENCHMARK(SHA1); -BENCHMARK(SHA256); -BENCHMARK(SHA512); +BENCHMARK(RIPEMD160, 440); +BENCHMARK(SHA1, 570); +BENCHMARK(SHA256, 340); +BENCHMARK(SHA512, 330); -BENCHMARK(SHA256_32b); -BENCHMARK(SipHash_32b); -BENCHMARK(FastRandom_32bit); -BENCHMARK(FastRandom_1bit); +BENCHMARK(SHA256_32b, 4700 * 1000); +BENCHMARK(SipHash_32b, 40 * 1000 * 1000); +BENCHMARK(FastRandom_32bit, 110 * 1000 * 1000); +BENCHMARK(FastRandom_1bit, 440 * 1000 * 1000); diff --git a/src/bench/lockedpool.cpp b/src/bench/lockedpool.cpp index 8cf4cfc57..60431e131 100644 --- a/src/bench/lockedpool.cpp +++ b/src/bench/lockedpool.cpp @@ -1,44 +1,44 @@ // Copyright (c) 2016 The Bitcoin Core developers // Distributed under the MIT software license, see the accompanying // file COPYING or http://www.opensource.org/licenses/mit-license.php. #include "bench.h" #include "support/lockedpool.h" #include #include #define ASIZE 2048 #define BITER 5000 #define MSIZE 2048 static void BenchLockedPool(benchmark::State &state) { void *synth_base = reinterpret_cast(0x08000000); const size_t synth_size = 1024 * 1024; Arena b(synth_base, synth_size, 16); std::vector addr; for (int x = 0; x < ASIZE; ++x) - addr.push_back(0); + addr.push_back(nullptr); uint32_t s = 0x12345678; while (state.KeepRunning()) { for (int x = 0; x < BITER; ++x) { int idx = s & (addr.size() - 1); if (s & 0x80000000) { b.free(addr[idx]); - addr[idx] = 0; + addr[idx] = nullptr; } else if (!addr[idx]) { addr[idx] = b.alloc((s >> 16) & (MSIZE - 1)); } bool lsb = s & 1; s >>= 1; if (lsb) s ^= 0xf00f00f0; // LFSR period 0xf7ffffe0 } } for (void *ptr : addr) b.free(ptr); addr.clear(); } -BENCHMARK(BenchLockedPool); +BENCHMARK(BenchLockedPool, 530); diff --git a/src/bench/mempool_eviction.cpp b/src/bench/mempool_eviction.cpp index c4358d98b..9e38e778e 100644 --- a/src/bench/mempool_eviction.cpp +++ b/src/bench/mempool_eviction.cpp @@ -1,123 +1,123 @@ // Copyright (c) 2011-2016 The Bitcoin Core developers // Distributed under the MIT software license, see the accompanying // file COPYING or http://www.opensource.org/licenses/mit-license.php. #include "bench.h" #include "policy/policy.h" #include "txmempool.h" #include #include static void AddTx(const CTransaction &tx, const Amount &nFee, CTxMemPool &pool) { int64_t nTime = 0; double dPriority = 10.0; unsigned int nHeight = 1; bool spendsCoinbase = false; unsigned int sigOpCost = 4; LockPoints lp; pool.addUnchecked(tx.GetId(), CTxMemPoolEntry(MakeTransactionRef(tx), nFee, nTime, dPriority, nHeight, tx.GetValueOut(), spendsCoinbase, sigOpCost, lp)); } // Right now this is only testing eviction performance in an extremely small // mempool. Code needs to be written to generate a much wider variety of // unique transactions for a more meaningful performance measurement. static void MempoolEviction(benchmark::State &state) { CMutableTransaction tx1 = CMutableTransaction(); tx1.vin.resize(1); tx1.vin[0].scriptSig = CScript() << OP_1; tx1.vout.resize(1); tx1.vout[0].scriptPubKey = CScript() << OP_1 << OP_EQUAL; tx1.vout[0].nValue = 10 * COIN; CMutableTransaction tx2 = CMutableTransaction(); tx2.vin.resize(1); tx2.vin[0].scriptSig = CScript() << OP_2; tx2.vout.resize(1); tx2.vout[0].scriptPubKey = CScript() << OP_2 << OP_EQUAL; tx2.vout[0].nValue = 10 * COIN; CMutableTransaction tx3 = CMutableTransaction(); tx3.vin.resize(1); tx3.vin[0].prevout = COutPoint(tx2.GetId(), 0); tx3.vin[0].scriptSig = CScript() << OP_2; tx3.vout.resize(1); tx3.vout[0].scriptPubKey = CScript() << OP_3 << OP_EQUAL; tx3.vout[0].nValue = 10 * COIN; CMutableTransaction tx4 = CMutableTransaction(); tx4.vin.resize(2); tx4.vin[0].prevout = COutPoint(); tx4.vin[0].scriptSig = CScript() << OP_4; tx4.vin[1].prevout = COutPoint(); tx4.vin[1].scriptSig = CScript() << OP_4; tx4.vout.resize(2); tx4.vout[0].scriptPubKey = CScript() << OP_4 << OP_EQUAL; tx4.vout[0].nValue = 10 * COIN; tx4.vout[1].scriptPubKey = CScript() << OP_4 << OP_EQUAL; tx4.vout[1].nValue = 10 * COIN; CMutableTransaction tx5 = CMutableTransaction(); tx5.vin.resize(2); tx5.vin[0].prevout = COutPoint(tx4.GetId(), 0); tx5.vin[0].scriptSig = CScript() << OP_4; tx5.vin[1].prevout = COutPoint(); tx5.vin[1].scriptSig = CScript() << OP_5; tx5.vout.resize(2); tx5.vout[0].scriptPubKey = CScript() << OP_5 << OP_EQUAL; tx5.vout[0].nValue = 10 * COIN; tx5.vout[1].scriptPubKey = CScript() << OP_5 << OP_EQUAL; tx5.vout[1].nValue = 10 * COIN; CMutableTransaction tx6 = CMutableTransaction(); tx6.vin.resize(2); tx6.vin[0].prevout = COutPoint(tx4.GetId(), 1); tx6.vin[0].scriptSig = CScript() << OP_4; tx6.vin[1].prevout = COutPoint(); tx6.vin[1].scriptSig = CScript() << OP_6; tx6.vout.resize(2); tx6.vout[0].scriptPubKey = CScript() << OP_6 << OP_EQUAL; tx6.vout[0].nValue = 10 * COIN; tx6.vout[1].scriptPubKey = CScript() << OP_6 << OP_EQUAL; tx6.vout[1].nValue = 10 * COIN; CMutableTransaction tx7 = CMutableTransaction(); tx7.vin.resize(2); tx7.vin[0].prevout = COutPoint(tx5.GetId(), 0); tx7.vin[0].scriptSig = CScript() << OP_5; tx7.vin[1].prevout = COutPoint(tx6.GetId(), 0); tx7.vin[1].scriptSig = CScript() << OP_6; tx7.vout.resize(2); tx7.vout[0].scriptPubKey = CScript() << OP_7 << OP_EQUAL; tx7.vout[0].nValue = 10 * COIN; tx7.vout[1].scriptPubKey = CScript() << OP_7 << OP_EQUAL; tx7.vout[1].nValue = 10 * COIN; CTxMemPool pool; CTransaction t1(tx1); CTransaction t2(tx2); CTransaction t3(tx3); CTransaction t4(tx4); CTransaction t5(tx5); CTransaction t6(tx6); CTransaction t7(tx1); while (state.KeepRunning()) { AddTx(t1, 10000 * SATOSHI, pool); AddTx(t2, 5000 * SATOSHI, pool); AddTx(t3, 20000 * SATOSHI, pool); AddTx(t4, 7000 * SATOSHI, pool); AddTx(t5, 1000 * SATOSHI, pool); AddTx(t6, 1100 * SATOSHI, pool); AddTx(t7, 9000 * SATOSHI, pool); pool.TrimToSize(pool.DynamicMemoryUsage() * 3 / 4); pool.TrimToSize(t1.GetTotalSize()); } } -BENCHMARK(MempoolEviction); +BENCHMARK(MempoolEviction, 41000); diff --git a/src/bench/prevector_destructor.cpp b/src/bench/prevector_destructor.cpp index 2cd4f4ca9..b3c241e14 100644 --- a/src/bench/prevector_destructor.cpp +++ b/src/bench/prevector_destructor.cpp @@ -1,33 +1,33 @@ // Copyright (c) 2015-2017 The Bitcoin Core developers // Distributed under the MIT software license, see the accompanying // file COPYING or http://www.opensource.org/licenses/mit-license.php. #include "bench.h" #include "prevector.h" static void PrevectorDestructor(benchmark::State &state) { while (state.KeepRunning()) { for (auto x = 0; x < 1000; ++x) { prevector<28, uint8_t> t0; prevector<28, uint8_t> t1; t0.resize(28); t1.resize(29); } } } static void PrevectorClear(benchmark::State &state) { while (state.KeepRunning()) { for (auto x = 0; x < 1000; ++x) { prevector<28, uint8_t> t0; prevector<28, uint8_t> t1; t0.resize(28); t0.clear(); t1.resize(29); t1.clear(); } } } -BENCHMARK(PrevectorDestructor); -BENCHMARK(PrevectorClear); +BENCHMARK(PrevectorDestructor, 5700); +BENCHMARK(PrevectorClear, 5600); diff --git a/src/bench/rollingbloom.cpp b/src/bench/rollingbloom.cpp index b269038ef..228ec574e 100644 --- a/src/bench/rollingbloom.cpp +++ b/src/bench/rollingbloom.cpp @@ -1,44 +1,31 @@ // Copyright (c) 2016 The Bitcoin Core developers // Distributed under the MIT software license, see the accompanying // file COPYING or http://www.opensource.org/licenses/mit-license.php. #include #include "bench.h" #include "bloom.h" static void RollingBloom(benchmark::State &state) { CRollingBloomFilter filter(120000, 0.000001); std::vector data(32); uint32_t count = 0; - uint32_t nEntriesPerGeneration = (120000 + 1) / 2; - uint32_t countnow = 0; uint64_t match = 0; while (state.KeepRunning()) { count++; data[0] = count; data[1] = count >> 8; data[2] = count >> 16; data[3] = count >> 24; - if (countnow == nEntriesPerGeneration) { - auto b = benchmark::clock::now(); - filter.insert(data); - auto total = std::chrono::duration_cast( - benchmark::clock::now() - b) - .count(); - std::cout << "RollingBloom-refresh,1," << total << "," << total - << "," << total << "\n"; - countnow = 0; - } else { - filter.insert(data); - } - countnow++; + filter.insert(data); + data[0] = count >> 24; data[1] = count >> 16; data[2] = count >> 8; data[3] = count; match += filter.contains(data); } } -BENCHMARK(RollingBloom); +BENCHMARK(RollingBloom, 1500 * 1000);