diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -244,6 +244,7 @@ avalanche.cpp bloom.cpp blockencodings.cpp + blockfilter.cpp chain.cpp checkpoints.cpp config.cpp diff --git a/src/Makefile.am b/src/Makefile.am --- a/src/Makefile.am +++ b/src/Makefile.am @@ -107,6 +107,7 @@ bloom.h \ blockencodings.h \ blockfileinfo.h \ + blockfilter.h \ blockindexworkcomparator.h \ blockstatus.h \ blockvalidity.h \ @@ -236,6 +237,7 @@ avalanche.cpp \ bloom.cpp \ blockencodings.cpp \ + blockfilter.cpp \ chain.cpp \ checkpoints.cpp \ config.cpp \ diff --git a/src/Makefile.bench.include b/src/Makefile.bench.include --- a/src/Makefile.bench.include +++ b/src/Makefile.bench.include @@ -21,6 +21,7 @@ bench/rollingbloom.cpp \ bench/crypto_hash.cpp \ bench/ccoins_caching.cpp \ + bench/gcs_filter.cpp \ bench/merkle_root.cpp \ bench/mempool_eviction.cpp \ bench/base58.cpp \ diff --git a/src/Makefile.test.include b/src/Makefile.test.include --- a/src/Makefile.test.include +++ b/src/Makefile.test.include @@ -38,6 +38,7 @@ test/bip32_tests.cpp \ test/blockcheck_tests.cpp \ test/blockencodings_tests.cpp \ + test/blockfilter_tests.cpp \ test/blockindex_tests.cpp \ test/blockstatus_tests.cpp \ test/bloom_tests.cpp \ diff --git a/src/bench/CMakeLists.txt b/src/bench/CMakeLists.txt --- a/src/bench/CMakeLists.txt +++ b/src/bench/CMakeLists.txt @@ -14,6 +14,7 @@ # checkblock.cpp TODO Fix including bench/data/*.raw files checkqueue.cpp crypto_hash.cpp + gcs_filter.cpp Examples.cpp lockedpool.cpp mempool_eviction.cpp diff --git a/src/bench/gcs_filter.cpp b/src/bench/gcs_filter.cpp new file mode 100644 --- /dev/null +++ b/src/bench/gcs_filter.cpp @@ -0,0 +1,41 @@ +// Copyright (c) 2018 The Bitcoin Core developers +// Distributed under the MIT software license, see the accompanying +// file COPYING or http://www.opensource.org/licenses/mit-license.php. + +#include +#include + +static void ConstructGCSFilter(benchmark::State &state) { + GCSFilter::ElementSet elements; + for (int i = 0; i < 10000; ++i) { + GCSFilter::Element element(32); + element[0] = static_cast(i); + element[1] = static_cast(i >> 8); + elements.insert(std::move(element)); + } + + uint64_t siphash_k0 = 0; + while (state.KeepRunning()) { + GCSFilter filter(siphash_k0, 0, 20, 1 << 20, elements); + + siphash_k0++; + } +} + +static void MatchGCSFilter(benchmark::State &state) { + GCSFilter::ElementSet elements; + for (int i = 0; i < 10000; ++i) { + GCSFilter::Element element(32); + element[0] = static_cast(i); + element[1] = static_cast(i >> 8); + elements.insert(std::move(element)); + } + GCSFilter filter(0, 0, 20, 1 << 20, elements); + + while (state.KeepRunning()) { + filter.Match(GCSFilter::Element()); + } +} + +BENCHMARK(ConstructGCSFilter, 1000); +BENCHMARK(MatchGCSFilter, 50 * 1000); diff --git a/src/blockfilter.h b/src/blockfilter.h new file mode 100644 --- /dev/null +++ b/src/blockfilter.h @@ -0,0 +1,74 @@ +// Copyright (c) 2018 The Bitcoin Core developers +// Distributed under the MIT software license, see the accompanying +// file COPYING or http://www.opensource.org/licenses/mit-license.php. + +#ifndef BITCOIN_BLOCKFILTER_H +#define BITCOIN_BLOCKFILTER_H + +#include +#include + +#include +#include +#include + +/** + * This implements a Golomb-coded set as defined in BIP 158. It is a + * compact, probabilistic data structure for testing set membership. + */ +class GCSFilter { +public: + typedef std::vector Element; + typedef std::set ElementSet; + +private: + uint64_t m_siphash_k0; + uint64_t m_siphash_k1; + uint8_t m_P; //!< Golomb-Rice coding parameter + uint32_t m_M; //!< Inverse false positive rate + uint32_t m_N; //!< Number of elements in the filter + uint64_t m_F; //!< Range of element hashes, F = N * M + std::vector m_encoded; + + /** Hash a data element to an integer in the range [0, N * M). */ + uint64_t HashToRange(const Element &element) const; + + std::vector BuildHashedSet(const ElementSet &elements) const; + + /** Helper method used to implement Match and MatchAny */ + bool MatchInternal(const uint64_t *sorted_element_hashes, + size_t size) const; + +public: + /** Constructs an empty filter. */ + GCSFilter(uint64_t siphash_k0 = 0, uint64_t siphash_k1 = 0, uint8_t P = 0, + uint32_t M = 0); + + /** Reconstructs an already-created filter from an encoding. */ + GCSFilter(uint64_t siphash_k0, uint64_t siphash_k1, uint8_t P, uint32_t M, + std::vector encoded_filter); + + /** Builds a new filter from the params and set of elements. */ + GCSFilter(uint64_t siphash_k0, uint64_t siphash_k1, uint8_t P, uint32_t M, + const ElementSet &elements); + + uint8_t GetP() const { return m_P; } + uint32_t GetN() const { return m_N; } + uint32_t GetM() const { return m_M; } + const std::vector &GetEncoded() const { return m_encoded; } + + /** + * Checks if the element may be in the set. False positives are possible + * with probability 1/M. + */ + bool Match(const Element &element) const; + + /** + * Checks if any of the given elements may be in the set. False positives + * are possible with probability 1/M per element checked. This is more + * efficient that checking Match on multiple elements separately. + */ + bool MatchAny(const ElementSet &elements) const; +}; + +#endif // BITCOIN_BLOCKFILTER_H diff --git a/src/blockfilter.cpp b/src/blockfilter.cpp new file mode 100644 --- /dev/null +++ b/src/blockfilter.cpp @@ -0,0 +1,198 @@ +// Copyright (c) 2018 The Bitcoin Core developers +// Distributed under the MIT software license, see the accompanying +// file COPYING or http://www.opensource.org/licenses/mit-license.php. + +#include +#include +#include + +/// SerType used to serialize parameters in GCS filter encoding. +static constexpr int GCS_SER_TYPE = SER_NETWORK; + +/// Protocol version used to serialize parameters in GCS filter encoding. +static constexpr int GCS_SER_VERSION = 0; + +template +static void GolombRiceEncode(BitStreamWriter &bitwriter, uint8_t P, + uint64_t x) { + // Write quotient as unary-encoded: q 1's followed by one 0. + uint64_t q = x >> P; + while (q > 0) { + int nbits = q <= 64 ? static_cast(q) : 64; + bitwriter.Write(~0ULL, nbits); + q -= nbits; + } + bitwriter.Write(0, 1); + + // Write the remainder in P bits. Since the remainder is just the bottom + // P bits of x, there is no need to mask first. + bitwriter.Write(x, P); +} + +template +static uint64_t GolombRiceDecode(BitStreamReader &bitreader, + uint8_t P) { + // Read unary-encoded quotient: q 1's followed by one 0. + uint64_t q = 0; + while (bitreader.Read(1) == 1) { + ++q; + } + + uint64_t r = bitreader.Read(P); + + return (q << P) + r; +} + +// Map a value x that is uniformly distributed in the range [0, 2^64) to a +// value uniformly distributed in [0, n) by returning the upper 64 bits of +// x * n. +// +// See: +// https://lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction/ +static uint64_t MapIntoRange(uint64_t x, uint64_t n) { +#ifdef __SIZEOF_INT128__ + return (static_cast(x) * + static_cast(n)) >> + 64; +#else + // To perform the calculation on 64-bit numbers without losing the + // result to overflow, split the numbers into the most significant and + // least significant 32 bits and perform multiplication piece-wise. + // + // See: https://stackoverflow.com/a/26855440 + uint64_t x_hi = x >> 32; + uint64_t x_lo = x & 0xFFFFFFFF; + uint64_t n_hi = n >> 32; + uint64_t n_lo = n & 0xFFFFFFFF; + + uint64_t ac = x_hi * n_hi; + uint64_t ad = x_hi * n_lo; + uint64_t bc = x_lo * n_hi; + uint64_t bd = x_lo * n_lo; + + uint64_t mid34 = (bd >> 32) + (bc & 0xFFFFFFFF) + (ad & 0xFFFFFFFF); + uint64_t upper64 = ac + (bc >> 32) + (ad >> 32) + (mid34 >> 32); + return upper64; +#endif +} + +uint64_t GCSFilter::HashToRange(const Element &element) const { + uint64_t hash = CSipHasher(m_siphash_k0, m_siphash_k1) + .Write(element.data(), element.size()) + .Finalize(); + return MapIntoRange(hash, m_F); +} + +std::vector +GCSFilter::BuildHashedSet(const ElementSet &elements) const { + std::vector hashed_elements; + hashed_elements.reserve(elements.size()); + for (const Element &element : elements) { + hashed_elements.push_back(HashToRange(element)); + } + std::sort(hashed_elements.begin(), hashed_elements.end()); + return hashed_elements; +} + +GCSFilter::GCSFilter(uint64_t siphash_k0, uint64_t siphash_k1, uint8_t P, + uint32_t M) + : m_siphash_k0(siphash_k0), m_siphash_k1(siphash_k1), m_P(P), m_M(M), + m_N(0), m_F(0) {} + +GCSFilter::GCSFilter(uint64_t siphash_k0, uint64_t siphash_k1, uint8_t P, + uint32_t M, std::vector encoded_filter) + : GCSFilter(siphash_k0, siphash_k1, P, M) { + m_encoded = std::move(encoded_filter); + + VectorReader stream(GCS_SER_TYPE, GCS_SER_VERSION, m_encoded, 0); + + uint64_t N = ReadCompactSize(stream); + m_N = static_cast(N); + if (m_N != N) { + throw std::ios_base::failure("N must be <2^32"); + } + m_F = static_cast(m_N) * static_cast(m_M); + + // Verify that the encoded filter contains exactly N elements. If it has too + // much or too little data, a std::ios_base::failure exception will be + // raised. + BitStreamReader bitreader(stream); + for (uint64_t i = 0; i < m_N; ++i) { + GolombRiceDecode(bitreader, m_P); + } + if (!stream.empty()) { + throw std::ios_base::failure("encoded_filter contains excess data"); + } +} + +GCSFilter::GCSFilter(uint64_t siphash_k0, uint64_t siphash_k1, uint8_t P, + uint32_t M, const ElementSet &elements) + : GCSFilter(siphash_k0, siphash_k1, P, M) { + size_t N = elements.size(); + m_N = static_cast(N); + if (m_N != N) { + throw std::invalid_argument("N must be <2^32"); + } + m_F = static_cast(m_N) * static_cast(m_M); + + CVectorWriter stream(GCS_SER_TYPE, GCS_SER_VERSION, m_encoded, 0); + + WriteCompactSize(stream, m_N); + + if (elements.empty()) { + return; + } + + BitStreamWriter bitwriter(stream); + + uint64_t last_value = 0; + for (uint64_t value : BuildHashedSet(elements)) { + uint64_t delta = value - last_value; + GolombRiceEncode(bitwriter, m_P, delta); + last_value = value; + } + + bitwriter.Flush(); +} + +bool GCSFilter::MatchInternal(const uint64_t *element_hashes, + size_t size) const { + VectorReader stream(GCS_SER_TYPE, GCS_SER_VERSION, m_encoded, 0); + + // Seek forward by size of N + uint64_t N = ReadCompactSize(stream); + assert(N == m_N); + + BitStreamReader bitreader(stream); + + uint64_t value = 0; + size_t hashes_index = 0; + for (uint32_t i = 0; i < m_N; ++i) { + uint64_t delta = GolombRiceDecode(bitreader, m_P); + value += delta; + + while (true) { + if (hashes_index == size) { + return false; + } else if (element_hashes[hashes_index] == value) { + return true; + } else if (element_hashes[hashes_index] > value) { + break; + } + + hashes_index++; + } + } + + return false; +} + +bool GCSFilter::Match(const Element &element) const { + uint64_t query = HashToRange(element); + return MatchInternal(&query, 1); +} + +bool GCSFilter::MatchAny(const ElementSet &elements) const { + const std::vector queries = BuildHashedSet(elements); + return MatchInternal(queries.data(), queries.size()); +} diff --git a/src/test/CMakeLists.txt b/src/test/CMakeLists.txt --- a/src/test/CMakeLists.txt +++ b/src/test/CMakeLists.txt @@ -56,6 +56,7 @@ bip32_tests.cpp blockcheck_tests.cpp blockencodings_tests.cpp + blockfilter_tests.cpp blockindex_tests.cpp blockstatus_tests.cpp bloom_tests.cpp diff --git a/src/test/blockfilter_tests.cpp b/src/test/blockfilter_tests.cpp new file mode 100644 --- /dev/null +++ b/src/test/blockfilter_tests.cpp @@ -0,0 +1,33 @@ +// Copyright (c) 2018 The Bitcoin Core developers +// Distributed under the MIT software license, see the accompanying +// file COPYING or http://www.opensource.org/licenses/mit-license.php. + +#include + +#include + +BOOST_AUTO_TEST_SUITE(blockfilter_tests) + +BOOST_AUTO_TEST_CASE(gcsfilter_test) { + GCSFilter::ElementSet included_elements, excluded_elements; + for (int i = 0; i < 100; ++i) { + GCSFilter::Element element1(32); + element1[0] = i; + included_elements.insert(std::move(element1)); + + GCSFilter::Element element2(32); + element2[1] = i; + excluded_elements.insert(std::move(element2)); + } + + GCSFilter filter(0, 0, 10, 1 << 10, included_elements); + for (const auto &element : included_elements) { + BOOST_CHECK(filter.Match(element)); + + auto insertion = excluded_elements.insert(element); + BOOST_CHECK(filter.MatchAny(excluded_elements)); + excluded_elements.erase(insertion.first); + } +} + +BOOST_AUTO_TEST_SUITE_END()