diff --git a/src/blockencodings.h b/src/blockencodings.h --- a/src/blockencodings.h +++ b/src/blockencodings.h @@ -7,6 +7,11 @@ #include #include +#include + +#include +#include +#include class Config; class CTxMemPool; @@ -54,6 +59,13 @@ template void UnserData(Stream &s) { s >> tx; } }; +struct ShortIdProcessorPrefilledTransactionAdapter { + uint32_t getIndex(const PrefilledTransaction &pt) const { return pt.index; } + CTransactionRef getItem(const PrefilledTransaction &pt) const { + return pt.tx; + } +}; + typedef enum ReadStatus_t { READ_STATUS_OK, // Invalid object, peer is sending bogus crap. @@ -127,8 +139,24 @@ }; class PartiallyDownloadedBlock { + struct CTransactionRefCompare { + bool operator()(const CTransactionRef &lhs, + const CTransactionRef &rhs) const { + return lhs->GetHash() == rhs->GetHash(); + } + }; + + using TransactionShortIdProcessor = + ShortIdProcessor; + + // FIXME This better fits a unique_ptr, but the unit tests needs a copy + // operator for this class. It can be trivially changed when the unit tests + // are refactored. + std::shared_ptr shortidProcessor; + protected: - std::vector txns_available; size_t prefilled_count = 0, mempool_count = 0, extra_count = 0; const CTxMemPool *pool; const Config *config; diff --git a/src/blockencodings.cpp b/src/blockencodings.cpp --- a/src/blockencodings.cpp +++ b/src/blockencodings.cpp @@ -60,86 +60,51 @@ return READ_STATUS_INVALID; } - assert(header.IsNull() && txns_available.empty()); + assert(header.IsNull()); + assert(shortidProcessor == nullptr); header = cmpctblock.header; - txns_available.resize(cmpctblock.BlockTxCount()); for (const auto &prefilledtxn : cmpctblock.prefilledtxn) { if (prefilledtxn.tx->IsNull()) { return READ_STATUS_INVALID; } - - txns_available[prefilledtxn.index] = prefilledtxn.tx; } - prefilled_count = cmpctblock.prefilledtxn.size(); - // Calculate map of txids -> positions and check mempool to see what we have - // (or don't). Because well-formed cmpctblock messages will have a - // (relatively) uniform distribution of short IDs, any highly-uneven - // distribution of elements can be safely treated as a READ_STATUS_FAILED. - std::unordered_map shorttxids( - cmpctblock.shorttxids.size()); - uint32_t index_offset = 0; - for (size_t i = 0; i < cmpctblock.shorttxids.size(); i++) { - while (txns_available[i + index_offset]) { - index_offset++; - } - - shorttxids[cmpctblock.shorttxids[i]] = i + index_offset; - // To determine the chance that the number of entries in a bucket - // exceeds N, we use the fact that the number of elements in a single - // bucket is binomially distributed (with n = the number of shorttxids - // S, and p = 1 / the number of buckets), that in the worst case the - // number of buckets is equal to S (due to std::unordered_map having a - // default load factor of 1.0), and that the chance for any bucket to - // exceed N elements is at most buckets * (the chance that any given - // bucket is above N elements). Thus: P(max_elements_per_bucket > N) <= - // S * (1 - cdf(binomial(n=S,p=1/S), N)). If we assume blocks of up to - // 16000, allowing 12 elements per bucket should only fail once per ~1 - // million block transfers (per peer and connection). - if (shorttxids.bucket_size( - shorttxids.bucket(cmpctblock.shorttxids[i])) > 12) { - return READ_STATUS_FAILED; - } - } - - // TODO: in the shortid-collision case, we should instead request both - // transactions which collided. Falling back to full-block-request here is - // overkill. - if (shorttxids.size() != cmpctblock.shorttxids.size()) { - // Short ID collision + // To determine the chance that the number of entries in a bucket exceeds N, + // we use the fact that the number of elements in a single bucket is + // binomially distributed (with n = the number of shorttxids S, and + // p = 1 / the number of buckets), that in the worst case the number of + // buckets is equal to S (due to std::unordered_map having a default load + // factor of 1.0), and that the chance for any bucket to exceed N elements + // is at most buckets * (the chance that any given bucket is above N + // elements). Thus: + // P(max_elements_per_bucket > N) <= S * (1 - cdf(binomial(n=S,p=1/S), N)) + // If we assume blocks of up to 16000, allowing 12 elements per bucket + // should only fail once per ~1 million block transfers (per peer and + // connection). + // FIXME the value of 16000 txs in a block should be re-evaluated. + shortidProcessor = std::make_shared( + cmpctblock.prefilledtxn, cmpctblock.shorttxids, 12); + + if (!shortidProcessor->isEvenlyDistributed() || + shortidProcessor->hasShortIdCollision() || + shortidProcessor->hasOutOfBoundIndex()) { + // TODO: in the shortid-collision case, we should instead request both + // transactions which collided. Falling back to full-block-request here + // is overkill. return READ_STATUS_FAILED; } - std::vector have_txn(txns_available.size()); { LOCK(pool->cs); for (size_t i = 0; i < pool->vTxHashes.size(); i++) { uint64_t shortid = cmpctblock.GetShortID(pool->vTxHashes[i].first); - std::unordered_map::iterator idit = - shorttxids.find(shortid); - if (idit != shorttxids.end()) { - if (!have_txn[idit->second]) { - txns_available[idit->second] = - pool->vTxHashes[i].second->GetSharedTx(); - have_txn[idit->second] = true; - mempool_count++; - } else { - // If we find two mempool txn that match the short id, just - // request it. This should be rare enough that the extra - // bandwidth doesn't matter, but eating a round-trip due to - // FillBlock failure would be annoying. - if (txns_available[idit->second]) { - txns_available[idit->second].reset(); - mempool_count--; - } - } - } - // Though ideally we'd continue scanning for the - // two-txn-match-shortid case, the performance win of an early exit - // here is too good to pass up and worth the extra risk. - if (mempool_count == shorttxids.size()) { + + mempool_count += shortidProcessor->matchKnownItem( + shortid, pool->vTxHashes[i].second->GetSharedTx()); + + if (mempool_count == shortidProcessor->getShortIdCount()) { break; } } @@ -147,35 +112,12 @@ for (auto &extra_txn : extra_txns) { uint64_t shortid = cmpctblock.GetShortID(extra_txn.first); - std::unordered_map::iterator idit = - shorttxids.find(shortid); - if (idit != shorttxids.end()) { - if (!have_txn[idit->second]) { - txns_available[idit->second] = extra_txn.second; - have_txn[idit->second] = true; - mempool_count++; - extra_count++; - } else { - // If we find two mempool/extra txn that match the short id, - // just request it. This should be rare enough that the extra - // bandwidth doesn't matter, but eating a round-trip due to - // FillBlock failure would be annoying. Note that we don't want - // duplication between extra_txns and mempool to trigger this - // case, so we compare hashes first. - if (txns_available[idit->second] && - txns_available[idit->second]->GetHash() != - extra_txn.second->GetHash()) { - txns_available[idit->second].reset(); - mempool_count--; - extra_count--; - } - } - } - // Though ideally we'd continue scanning for the two-txn-match-shortid - // case, the performance win of an early exit here is too good to pass - // up and worth the extra risk. - if (mempool_count == shorttxids.size()) { + int count = shortidProcessor->matchKnownItem(shortid, extra_txn.second); + mempool_count += count; + extra_count += count; + + if (mempool_count == shortidProcessor->getShortIdCount()) { break; } } @@ -191,8 +133,8 @@ bool PartiallyDownloadedBlock::IsTxAvailable(size_t index) const { assert(!header.IsNull()); - assert(index < txns_available.size()); - return txns_available[index] != nullptr; + assert(shortidProcessor != nullptr); + return shortidProcessor->getItem(index) != nullptr; } ReadStatus PartiallyDownloadedBlock::FillBlock( @@ -200,11 +142,12 @@ assert(!header.IsNull()); uint256 hash = header.GetHash(); block = header; - block.vtx.resize(txns_available.size()); + const size_t txnCount = shortidProcessor->getItemCount(); + block.vtx.resize(txnCount); size_t tx_missing_offset = 0; - for (size_t i = 0; i < txns_available.size(); i++) { - auto &txn_available = txns_available[i]; + for (size_t i = 0; i < txnCount; i++) { + auto &txn_available = shortidProcessor->getItem(i); if (!txn_available) { if (vtx_missing.size() <= tx_missing_offset) { return READ_STATUS_INVALID; @@ -217,7 +160,7 @@ // Make sure we can't call FillBlock again. header.SetNull(); - txns_available.clear(); + shortidProcessor.reset(); if (vtx_missing.size() != tx_missing_offset) { return READ_STATUS_INVALID; diff --git a/src/shortidprocessor.h b/src/shortidprocessor.h new file mode 100644 --- /dev/null +++ b/src/shortidprocessor.h @@ -0,0 +1,125 @@ +// Copyright (c) 2022 The Bitcoin developers +// Distributed under the MIT software license, see the accompanying +// file COPYING or http://www.opensource.org/licenses/mit-license.php. + +#ifndef BITCOIN_SHORTIDPROCESSOR_H +#define BITCOIN_SHORTIDPROCESSOR_H + +#include +#include +#include +#include +#include +#include +#include + +template +class ShortIdProcessor : Adapter, ItemCompare { + using ItemType = typename std::remove_reference().getItem( + std::declval()))>::type; + + bool evenlyDistributed = true; + bool shortIdCollision = false; + bool outOfBoundIndex = false; + + std::vector itemsAvailable; + std::vector haveItem; + std::unordered_map shortIdIndexMap; + + int addItem(uint32_t index, const ItemType &item) { + if (index >= itemsAvailable.size()) { + outOfBoundIndex = true; + return 0; + } + + if (!haveItem[index]) { + haveItem[index] = true; + itemsAvailable[index] = item; + return 1; + } + + // If we find two items that collides for the same index, just request + // this index. This should be rare enough that the extra bandwidth + // doesn't matter. Due to the way the compact messages are encoded, this + // can only occur in the event of a shortid collision. + if (itemsAvailable[index] && !(*this)(itemsAvailable[index], item)) { + itemsAvailable[index] = ItemType(); + return -1; + } + + return 0; + } + +public: + ShortIdProcessor(const std::vector &prefilledItems, + const std::vector &shortids, + size_t maxShortIdPerBucket) + : itemsAvailable(prefilledItems.size() + shortids.size()), + haveItem(prefilledItems.size() + shortids.size()), + shortIdIndexMap(shortids.size()) { + for (const auto &prefilledItem : prefilledItems) { + addItem(Adapter::getIndex(prefilledItem), + Adapter::getItem(prefilledItem)); + } + + uint32_t index_offset = 0; + for (size_t i = 0; i < shortids.size(); i++) { + while (itemsAvailable[i + index_offset]) { + index_offset++; + } + + shortIdIndexMap[shortids[i]] = i + index_offset; + + // Because well-formed compact messages will have a (relatively) + // uniform distribution of short IDs, any highly-uneven distribution + // of elements can be safely treated as an error. + if (shortIdIndexMap.bucket_size(shortIdIndexMap.bucket( + shortids[i])) > maxShortIdPerBucket) { + evenlyDistributed = false; + } + } + + shortIdCollision = shortIdIndexMap.size() != shortids.size(); + } + + /** Status accessors */ + bool isEvenlyDistributed() const { return evenlyDistributed; } + bool hasShortIdCollision() const { return shortIdCollision; } + bool hasOutOfBoundIndex() const { return outOfBoundIndex; } + + /** Total item count */ + size_t getItemCount() const { return itemsAvailable.size(); } + /** Unique shortid count */ + size_t getShortIdCount() const { return shortIdIndexMap.size(); } + + /** + * Attempts to add a known item by matching its shortid with the supplied + * ones. The shortids must be processed prior from calling this method. + * + * @param[in] shortid The shortid of the item + * @param[in] item The item to be added, whose index is retrieved from + * its shortid + * + * @return int The number of added items: + * * 0 if it was not added (e.g. index out of bounds). + * * +1 if added successfully. + * * -1 if an existing item was removed due to collision. + */ + int matchKnownItem(uint64_t shortid, const ItemType &item) { + auto idit = shortIdIndexMap.find(shortid); + if (idit == shortIdIndexMap.end()) { + return 0; + } + + return addItem(idit->second, item); + } + + const ItemType &getItem(size_t index) const { + assert(index < itemsAvailable.size()); + + return itemsAvailable[index]; + } +}; + +#endif // BITCOIN_SHORTIDPROCESSOR_H diff --git a/src/test/CMakeLists.txt b/src/test/CMakeLists.txt --- a/src/test/CMakeLists.txt +++ b/src/test/CMakeLists.txt @@ -199,6 +199,7 @@ scriptnum_tests.cpp serialize_tests.cpp settings_tests.cpp + shortidprocessor_tests.cpp sigcache_tests.cpp sigencoding_tests.cpp sighash_tests.cpp diff --git a/src/test/shortidprocessor_tests.cpp b/src/test/shortidprocessor_tests.cpp new file mode 100644 --- /dev/null +++ b/src/test/shortidprocessor_tests.cpp @@ -0,0 +1,133 @@ +// Copyright (c) 2022 The Bitcoin developers +// Distributed under the MIT software license, see the accompanying +// file COPYING or http://www.opensource.org/licenses/mit-license.php. + +#include + +#include + +#include +#include + +BOOST_AUTO_TEST_SUITE(shortidprocessor_tests) + +namespace { +struct PrefilledTestItem { + uint32_t index; + std::shared_ptr item; + + PrefilledTestItem(uint32_t indexIn) + : index(indexIn), item(std::make_shared(indexIn)) {} +}; + +using ItemType = decltype(PrefilledTestItem::item); + +struct PrefilledTestItemAdapter { + uint32_t getIndex(const PrefilledTestItem &i) const { return i.index; } + ItemType getItem(const PrefilledTestItem &i) const { return i.item; } +}; + +struct TestItemCompare { + bool operator()(const ItemType &lhs, const ItemType &rhs) const { + return *lhs == *rhs; + } +}; +} // namespace + +BOOST_AUTO_TEST_CASE(processing_items) { + using TestItemShortIdProcessor = + ShortIdProcessor; + + { + TestItemShortIdProcessor p({}, {}, 0); + BOOST_CHECK_EQUAL(p.getItemCount(), 0); + BOOST_CHECK_EQUAL(p.getShortIdCount(), 0); + BOOST_CHECK(p.isEvenlyDistributed()); + BOOST_CHECK(!p.hasShortIdCollision()); + BOOST_CHECK(!p.hasOutOfBoundIndex()); + } + + { + // Trigger the bucket uneven distribution. + // 0 is not a realistic threshold value but is the only one which is + // guaranteed to always trigger the flag, because the number of buckets + // is set to the number of shortids. + TestItemShortIdProcessor p({}, {0}, 0); + BOOST_CHECK_EQUAL(p.getItemCount(), 1); + BOOST_CHECK_EQUAL(p.getShortIdCount(), 1); + BOOST_CHECK(!p.isEvenlyDistributed()); + BOOST_CHECK(!p.hasShortIdCollision()); + BOOST_CHECK(!p.hasOutOfBoundIndex()); + } + + { + // Trigger the short id collision + TestItemShortIdProcessor p({}, {0, 0}, 1); + BOOST_CHECK_EQUAL(p.getItemCount(), 2); + BOOST_CHECK_EQUAL(p.getShortIdCount(), 1); + BOOST_CHECK(p.isEvenlyDistributed()); + BOOST_CHECK(p.hasShortIdCollision()); + BOOST_CHECK(!p.hasOutOfBoundIndex()); + } + + // Trigger the out of bound index + auto checkOutOfBoundIndex = [&](const TestItemShortIdProcessor &p) { + BOOST_CHECK(p.isEvenlyDistributed()); + BOOST_CHECK(!p.hasShortIdCollision()); + BOOST_CHECK(p.hasOutOfBoundIndex()); + }; + checkOutOfBoundIndex(TestItemShortIdProcessor({1}, {}, 1)); + checkOutOfBoundIndex(TestItemShortIdProcessor({0, 1, 2, 3, 5}, {}, 1)); + checkOutOfBoundIndex(TestItemShortIdProcessor({4}, {0, 1, 2}, 3)); + checkOutOfBoundIndex(TestItemShortIdProcessor({0, 1, 6}, {0, 1, 2}, 3)); + + { + const std::vector prefilledItems{0, 1, 2, 5}; + const std::vector shortids{3, 4, 6, 7, 8, 9}; + TestItemShortIdProcessor p(prefilledItems, shortids, 10); + BOOST_CHECK_EQUAL(p.getItemCount(), 10); + BOOST_CHECK_EQUAL(p.getShortIdCount(), 6); + BOOST_CHECK(p.isEvenlyDistributed()); + BOOST_CHECK(!p.hasShortIdCollision()); + BOOST_CHECK(!p.hasOutOfBoundIndex()); + + BOOST_CHECK(p.getItem(0) == prefilledItems[0].item); + BOOST_CHECK(p.getItem(1) == prefilledItems[1].item); + BOOST_CHECK(p.getItem(2) == prefilledItems[2].item); + BOOST_CHECK(p.getItem(3) == nullptr); + BOOST_CHECK(p.getItem(4) == nullptr); + BOOST_CHECK(p.getItem(5) == prefilledItems[3].item); + BOOST_CHECK(p.getItem(6) == nullptr); + BOOST_CHECK(p.getItem(7) == nullptr); + BOOST_CHECK(p.getItem(8) == nullptr); + BOOST_CHECK(p.getItem(9) == nullptr); + + // Add a missing shortid + auto item3 = std::make_shared(3); + BOOST_CHECK_EQUAL(p.matchKnownItem(3, item3), 1); + BOOST_CHECK(p.getItem(3) == item3); + + // If the same item is added again, it has no effect + for (size_t i = 0; i < 10; i++) { + BOOST_CHECK_EQUAL( + p.matchKnownItem(3, std::make_shared(3)), 0); + BOOST_CHECK(p.getItem(3) == item3); + } + + // Shortid collision, the item is removed from the available list so it + // will be requested. + BOOST_CHECK_EQUAL(p.matchKnownItem(3, std::make_shared(30)), + -1); + BOOST_CHECK(p.getItem(3) == nullptr); + + // Now that collision occurred, adding other items as no effect + for (size_t i = 0; i < 10; i++) { + BOOST_CHECK_EQUAL( + p.matchKnownItem(3, std::make_shared(i)), 0); + BOOST_CHECK(p.getItem(3) == nullptr); + } + } +} + +BOOST_AUTO_TEST_SUITE_END()