diff --git a/src/init.cpp b/src/init.cpp --- a/src/init.cpp +++ b/src/init.cpp @@ -1817,7 +1817,7 @@ GetRand(std::numeric_limits::max()))); CConnman &connman = *g_connman; - peerLogic.reset(new PeerLogicValidation(&connman)); + peerLogic.reset(new PeerLogicValidation(&connman, scheduler)); RegisterValidationInterface(peerLogic.get()); if (gArgs.IsArgSet("-onlynet")) { diff --git a/src/net.h b/src/net.h --- a/src/net.h +++ b/src/net.h @@ -246,6 +246,19 @@ void GetBanned(banmap_t &banmap); void SetBanned(const banmap_t &banmap); + // This allows temporarily exceeding nMaxOutbound, with the goal of finding + // a peer that is better than all our current peers. + void SetTryNewOutboundPeer(bool flag); + bool GetTryNewOutboundPeer(); + + // Return the number of outbound peers we have in excess of our target (eg, + // if we previously called SetTryNewOutboundPeer(true), and have since set + // to false, we may have extra peers that we wish to disconnect). This may + // return a value less than (num_outbound_connections - num_outbound_slots) + // in cases where some outbound connections are not yet fully connected, or + // not yet fully disconnected. + int GetExtraOutboundCount(); + bool AddNode(const std::string &node); bool RemoveAddedNode(const std::string &node); std::vector GetAddedNodeInfo(); @@ -418,6 +431,15 @@ std::thread threadOpenAddedConnections; std::thread threadOpenConnections; std::thread threadMessageHandler; + + /** + * Flag for deciding to connect to an extra outbound peer, in excess of + * nMaxOutbound. + * This takes the place of a feeler connection. + */ + std::atomic_bool m_try_another_outbound_peer; + + friend struct CConnmanTest; }; extern std::unique_ptr g_connman; void Discover(); diff --git a/src/net.cpp b/src/net.cpp --- a/src/net.cpp +++ b/src/net.cpp @@ -1765,6 +1765,37 @@ } } +bool CConnman::GetTryNewOutboundPeer() { + return m_try_another_outbound_peer; +} + +void CConnman::SetTryNewOutboundPeer(bool flag) { + m_try_another_outbound_peer = flag; + LogPrint(BCLog::NET, "net: setting try another outbound peer=%s\n", + flag ? "true" : "false"); +} + +// Return the number of peers we have over our outbound connection limit. +// Exclude peers that are marked for disconnect, or are going to be disconnected +// soon (eg one-shots and feelers). +// Also exclude peers that haven't finished initial connection handshake yet (so +// that we don't decide we're over our desired connection limit, and then evict +// some peer that has finished the handshake). +int CConnman::GetExtraOutboundCount() { + int nOutbound = 0; + { + LOCK(cs_vNodes); + for (CNode *pnode : vNodes) { + if (!pnode->fInbound && !pnode->m_manual_connection && + !pnode->fFeeler && !pnode->fDisconnect && !pnode->fOneShot && + pnode->fSuccessfullyConnected) { + ++nOutbound; + } + } + } + return std::max(nOutbound - nMaxOutbound, 0); +} + void CConnman::ThreadOpenConnections(const std::vector connect) { // Connect to specific addresses if (!connect.empty()) { @@ -1859,7 +1890,8 @@ // * Only make a feeler connection once every few minutes. // bool fFeeler = false; - if (nOutbound >= nMaxOutbound) { + + if (nOutbound >= nMaxOutbound && !GetTryNewOutboundPeer()) { // The current time right now (in microseconds). int64_t nTime = GetTimeMicros(); if (nTime > nNextFeeler) { @@ -2344,6 +2376,7 @@ semOutbound = nullptr; semAddnode = nullptr; flagInterruptMsgProc = false; + SetTryNewOutboundPeer(false); Options connOptions; Init(connOptions); diff --git a/src/net_processing.h b/src/net_processing.h --- a/src/net_processing.h +++ b/src/net_processing.h @@ -6,6 +6,7 @@ #ifndef BITCOIN_NET_PROCESSING_H #define BITCOIN_NET_PROCESSING_H +#include "consensus/params.h" #include "net.h" #include "validationinterface.h" @@ -44,14 +45,26 @@ */ // 20 minutes static constexpr int64_t CHAIN_SYNC_TIMEOUT = 20 * 60; +/** How frequently to check for stale tips, in seconds */ +// 10 minutes +static constexpr int64_t STALE_CHECK_INTERVAL = 10 * 60; +/** + * How frequently to check for extra outbound peers and disconnect, in seconds. + */ +static constexpr int64_t EXTRA_PEER_CHECK_INTERVAL = 45; +/** + * Minimum time an outbound-peer-eviction candidate must be connected for, in + * order to evict, in seconds. + */ +static constexpr int64_t MINIMUM_CONNECT_TIME = 30; class PeerLogicValidation : public CValidationInterface, public NetEventsInterface { private: - CConnman *connman; + CConnman *const connman; public: - explicit PeerLogicValidation(CConnman *connman); + explicit PeerLogicValidation(CConnman *connman, CScheduler &scheduler); void BlockConnected(const std::shared_ptr &pblock, @@ -84,6 +97,13 @@ std::atomic &interrupt) override; void ConsiderEviction(CNode *pto, int64_t time_in_seconds); + void + CheckForStaleTipAndEvictPeers(const Consensus::Params &consensusParams); + void EvictExtraOutboundPeers(int64_t time_in_seconds); + +private: + //! Next time to check for stale tip + int64_t m_stale_tip_check_time; }; struct CNodeStateStats { diff --git a/src/net_processing.cpp b/src/net_processing.cpp --- a/src/net_processing.cpp +++ b/src/net_processing.cpp @@ -23,6 +23,7 @@ #include "primitives/block.h" #include "primitives/transaction.h" #include "random.h" +#include "scheduler.h" #include "tinyformat.h" #include "txmempool.h" #include "ui_interface.h" @@ -129,6 +130,9 @@ /** Number of outbound peers with m_chain_sync.m_protect. */ int g_outbound_peers_with_protect_from_disconnect = 0; +/** When our tip was last updated. */ +int64_t g_last_tip_update = 0; + /** Relay map, protected by cs_main. */ typedef std::map MapRelay; MapRelay mapRelay; @@ -242,6 +246,9 @@ ChainSyncTimeoutState m_chain_sync; + //! Time of last new block announcement + int64_t m_last_block_announcement; + CNodeState(CAddress addrIn, std::string addrNameIn) : address(addrIn), name(addrNameIn) { fCurrentlyConnected = false; @@ -264,6 +271,7 @@ fProvidesHeaderAndIDs = false; fSupportsDesiredCmpctVersion = false; m_chain_sync = {0, nullptr, false, false}; + m_last_block_announcement = 0; } }; @@ -496,6 +504,16 @@ }); } +bool TipMayBeStale(const Consensus::Params &consensusParams) { + AssertLockHeld(cs_main); + if (g_last_tip_update == 0) { + g_last_tip_update = GetTime(); + } + return g_last_tip_update < + GetTime() - consensusParams.nPowTargetSpacing * 3 && + mapBlocksInFlight.empty(); +} + // Requires cs_main bool CanDirectFetch(const Consensus::Params &consensusParams) { return chainActive.Tip()->GetBlockTime() > @@ -622,6 +640,16 @@ } // namespace +// This function is used for testing the stale tip eviction logic, see +// DoS_tests.cpp +void UpdateLastBlockAnnounceTime(NodeId node, int64_t time_in_seconds) { + LOCK(cs_main); + CNodeState *state = State(node); + if (state) { + state->m_last_block_announcement = time_in_seconds; + } +} + // Returns true for outbound peers, excluding manual connections, feelers, and // one-shots. bool IsOutboundDisconnectionCandidate(const CNode *node) { @@ -876,10 +904,24 @@ // blockchain -> download logic notification // -PeerLogicValidation::PeerLogicValidation(CConnman *connmanIn) - : connman(connmanIn) { +PeerLogicValidation::PeerLogicValidation(CConnman *connmanIn, + CScheduler &scheduler) + : connman(connmanIn), m_stale_tip_check_time(0) { // Initialize global variables that cannot be constructed at startup. recentRejects.reset(new CRollingBloomFilter(120000, 0.000001)); + + const Consensus::Params &consensusParams = Params().GetConsensus(); + // Stale tip checking and peer eviction are on two different timers, but we + // don't want them to get out of sync due to drift in the scheduler, so we + // combine them in one function and schedule at the quicker (peer-eviction) + // timer. + static_assert( + EXTRA_PEER_CHECK_INTERVAL < STALE_CHECK_INTERVAL, + "peer eviction timer should be less than stale tip check timer"); + scheduler.scheduleEvery( + std::bind(&PeerLogicValidation::CheckForStaleTipAndEvictPeers, this, + consensusParams), + EXTRA_PEER_CHECK_INTERVAL * 1000); } void PeerLogicValidation::BlockConnected( @@ -918,6 +960,8 @@ "Erased %d orphan tx included or conflicted by block\n", nErased); } + + g_last_tip_update = GetTime(); } static CCriticalSection cs_most_recent_block; @@ -1405,6 +1449,7 @@ return true; } + bool received_new_header = false; const CBlockIndex *pindexLast = nullptr; { LOCK(cs_main); @@ -1456,6 +1501,12 @@ } hashLastBlock = header.GetHash(); } + + // If we don't have the last header, then they'll have given us + // something new (if these headers are valid). + if (mapBlockIndex.find(hashLastBlock) == mapBlockIndex.end()) { + received_new_header = true; + } } CValidationState state; @@ -1525,6 +1576,11 @@ // because it is set in UpdateBlockAvailability. Some nullptr checks are // still present, however, as belt-and-suspenders. + if (received_new_header && + pindexLast->nChainWork > chainActive.Tip()->nChainWork) { + nodestate->m_last_block_announcement = GetTime(); + } + if (nCount == MAX_HEADERS_RESULTS) { // Headers message had its maximum size; the peer may have more // headers. @@ -1626,6 +1682,7 @@ } } } + if (!pfrom->fDisconnect && IsOutboundDisconnectionCandidate(pfrom) && nodestate->pindexBestKnownBlock != nullptr) { // If this is an outbound peer, check to see if we should protect it @@ -1635,6 +1692,9 @@ nodestate->pindexBestKnownBlock->nChainWork >= chainActive.Tip()->nChainWork && !nodestate->m_chain_sync.m_protect) { + LogPrint(BCLog::NET, + "Protecting outbound peer=%d from eviction\n", + pfrom->GetId()); nodestate->m_chain_sync.m_protect = true; ++g_outbound_peers_with_protect_from_disconnect; } @@ -2519,6 +2579,8 @@ CBlockHeaderAndShortTxIDs cmpctblock; vRecv >> cmpctblock; + bool received_new_header = false; + { LOCK(cs_main); @@ -2535,6 +2597,11 @@ } return true; } + + if (mapBlockIndex.find(cmpctblock.header.GetHash()) == + mapBlockIndex.end()) { + received_new_header = true; + } } const CBlockIndex *pindex = nullptr; @@ -2576,6 +2643,15 @@ assert(pindex); UpdateBlockAvailability(pfrom->GetId(), pindex->GetBlockHash()); + CNodeState *nodestate = State(pfrom->GetId()); + + // If this was a new header with more work than our tip, update the + // peer's last block announcement time + if (received_new_header && + pindex->nChainWork > chainActive.Tip()->nChainWork) { + nodestate->m_last_block_announcement = GetTime(); + } + std::map::iterator>>:: iterator blockInFlightIt = @@ -2610,8 +2686,6 @@ return true; } - CNodeState *nodestate = State(pfrom->GetId()); - // We want to be a bit conservative just to be extra careful about // DoS possibilities in compact block processing... if (pindex->nHeight <= chainActive.Height() + 2) { @@ -3375,6 +3449,106 @@ } } +void PeerLogicValidation::EvictExtraOutboundPeers(int64_t time_in_seconds) { + // Check whether we have too many outbound peers + int extra_peers = connman->GetExtraOutboundCount(); + if (extra_peers > 0) { + // If we have more outbound peers than we target, disconnect one. + // Pick the outbound peer that least recently announced us a new block, + // with ties broken by choosing the more recent connection (higher node + // id) + NodeId worst_peer = -1; + int64_t oldest_block_announcement = std::numeric_limits::max(); + + LOCK(cs_main); + + connman->ForEachNode([&](CNode *pnode) { + // Ignore non-outbound peers, or nodes marked for disconnect already + if (!IsOutboundDisconnectionCandidate(pnode) || + pnode->fDisconnect) { + return; + } + CNodeState *state = State(pnode->GetId()); + if (state == nullptr) { + // shouldn't be possible, but just in case + return; + } + // Don't evict our protected peers + if (state->m_chain_sync.m_protect) { + return; + } + if (state->m_last_block_announcement < oldest_block_announcement || + (state->m_last_block_announcement == + oldest_block_announcement && + pnode->GetId() > worst_peer)) { + worst_peer = pnode->GetId(); + oldest_block_announcement = state->m_last_block_announcement; + } + }); + if (worst_peer != -1) { + bool disconnected = connman->ForNode(worst_peer, [&](CNode *pnode) { + // Only disconnect a peer that has been connected to us for some + // reasonable fraction of our check-frequency, to give it time + // for new information to have arrived. + // Also don't disconnect any peer we're trying to download a + // block from. + CNodeState &state = *State(pnode->GetId()); + if (time_in_seconds - pnode->nTimeConnected > + MINIMUM_CONNECT_TIME && + state.nBlocksInFlight == 0) { + LogPrint(BCLog::NET, "disconnecting extra outbound peer=%d " + "(last block announcement received at " + "time %d)\n", + pnode->GetId(), oldest_block_announcement); + pnode->fDisconnect = true; + return true; + } else { + LogPrint(BCLog::NET, "keeping outbound peer=%d chosen for " + "eviction (connect time: %d, " + "blocks_in_flight: %d)\n", + pnode->GetId(), pnode->nTimeConnected, + state.nBlocksInFlight); + return false; + } + }); + if (disconnected) { + // If we disconnected an extra peer, that means we successfully + // connected to at least one peer after the last time we + // detected a stale tip. Don't try any more extra peers until we + // next detect a stale tip, to limit the load we put on the + // network from these extra connections. + connman->SetTryNewOutboundPeer(false); + } + } + } +} + +void PeerLogicValidation::CheckForStaleTipAndEvictPeers( + const Consensus::Params &consensusParams) { + if (connman == nullptr) { + return; + } + + int64_t time_in_seconds = GetTime(); + + EvictExtraOutboundPeers(time_in_seconds); + + if (time_in_seconds > m_stale_tip_check_time) { + LOCK(cs_main); + // Check whether our tip is stale, and if so, allow using an extra + // outbound peer. + if (TipMayBeStale(consensusParams)) { + LogPrintf("Potential stale tip detected, will try using extra " + "outbound peer (last tip update: %d seconds ago)\n", + time_in_seconds - g_last_tip_update); + connman->SetTryNewOutboundPeer(true); + } else if (connman->GetTryNewOutboundPeer()) { + connman->SetTryNewOutboundPeer(false); + } + m_stale_tip_check_time = time_in_seconds + STALE_CHECK_INTERVAL; + } +} + class CompareInvMempoolOrder { CTxMemPool *mp; diff --git a/src/test/DoS_tests.cpp b/src/test/DoS_tests.cpp --- a/src/test/DoS_tests.cpp +++ b/src/test/DoS_tests.cpp @@ -41,6 +41,8 @@ static NodeId id = 0; +void UpdateLastBlockAnnounceTime(NodeId node, int64_t time_in_seconds); + BOOST_FIXTURE_TEST_SUITE(DoS_tests, TestingSetup) // Test eviction of an outbound peer whose chain never advances @@ -92,6 +94,92 @@ peerLogic->FinalizeNode(config, dummyNode1.GetId(), dummy); } +void AddRandomOutboundPeer(const Config &config, std::vector &vNodes, + PeerLogicValidation &peerLogic) { + CAddress addr(ip(GetRandInt(0xffffffff)), NODE_NONE); + vNodes.emplace_back(new CNode(id++, ServiceFlags(NODE_NETWORK), 0, + INVALID_SOCKET, addr, 0, 0, CAddress(), "", + /*fInboundIn=*/false)); + CNode &node = *vNodes.back(); + node.SetSendVersion(PROTOCOL_VERSION); + + peerLogic.InitializeNode(config, &node); + node.nVersion = 1; + node.fSuccessfullyConnected = true; + + CConnmanTest::AddNode(node); +} + +BOOST_AUTO_TEST_CASE(stale_tip_peer_management) { + const Config &config = GetConfig(); + const Consensus::Params &consensusParams = + config.GetChainParams().GetConsensus(); + constexpr int nMaxOutbound = 8; + CConnman::Options options; + options.nMaxConnections = 125; + options.nMaxOutbound = nMaxOutbound; + options.nMaxFeeler = 1; + + connman->Init(options); + std::vector vNodes; + + // Mock some outbound peers + for (int i = 0; i < nMaxOutbound; ++i) { + AddRandomOutboundPeer(config, vNodes, *peerLogic); + } + + peerLogic->CheckForStaleTipAndEvictPeers(consensusParams); + + // No nodes should be marked for disconnection while we have no extra peers + for (const CNode *node : vNodes) { + BOOST_CHECK(node->fDisconnect == false); + } + + SetMockTime(GetTime() + 3 * consensusParams.nPowTargetSpacing + 1); + + // Now tip should definitely be stale, and we should look for an extra + // outbound peer + peerLogic->CheckForStaleTipAndEvictPeers(consensusParams); + BOOST_CHECK(connman->GetTryNewOutboundPeer()); + + // Still no peers should be marked for disconnection + for (const CNode *node : vNodes) { + BOOST_CHECK(node->fDisconnect == false); + } + + // If we add one more peer, something should get marked for eviction + // on the next check (since we're mocking the time to be in the future, the + // required time connected check should be satisfied). + AddRandomOutboundPeer(config, vNodes, *peerLogic); + + peerLogic->CheckForStaleTipAndEvictPeers(consensusParams); + for (int i = 0; i < nMaxOutbound; ++i) { + BOOST_CHECK(vNodes[i]->fDisconnect == false); + } + // Last added node should get marked for eviction + BOOST_CHECK(vNodes.back()->fDisconnect == true); + + vNodes.back()->fDisconnect = false; + + // Update the last announced block time for the last + // peer, and check that the next newest node gets evicted. + UpdateLastBlockAnnounceTime(vNodes.back()->GetId(), GetTime()); + + peerLogic->CheckForStaleTipAndEvictPeers(consensusParams); + for (int i = 0; i < nMaxOutbound - 1; ++i) { + BOOST_CHECK(vNodes[i]->fDisconnect == false); + } + BOOST_CHECK(vNodes[nMaxOutbound - 1]->fDisconnect == true); + BOOST_CHECK(vNodes.back()->fDisconnect == false); + + bool dummy; + for (const CNode *node : vNodes) { + peerLogic->FinalizeNode(config, node->GetId(), dummy); + } + + CConnmanTest::ClearNodes(); +} + BOOST_AUTO_TEST_CASE(DoS_banning) { const Config &config = GetConfig(); std::atomic interruptDummy(false); diff --git a/src/test/test_bitcoin.h b/src/test/test_bitcoin.h --- a/src/test/test_bitcoin.h +++ b/src/test/test_bitcoin.h @@ -10,6 +10,7 @@ #include "key.h" #include "pubkey.h" #include "random.h" +#include "scheduler.h" #include "txdb.h" #include "txmempool.h" @@ -61,12 +62,19 @@ * Included are data directory, coins database, script check threads setup. */ class CConnman; +class CNode; +struct CConnmanTest { + static void AddNode(CNode &node); + static void ClearNodes(); +}; + class PeerLogicValidation; struct TestingSetup : public BasicTestingSetup { CCoinsViewDB *pcoinsdbview; fs::path pathTemp; boost::thread_group threadGroup; CConnman *connman; + CScheduler scheduler; std::unique_ptr peerLogic; TestingSetup(const std::string &chainName = CBaseChainParams::MAIN); diff --git a/src/test/test_bitcoin.cpp b/src/test/test_bitcoin.cpp --- a/src/test/test_bitcoin.cpp +++ b/src/test/test_bitcoin.cpp @@ -36,6 +36,16 @@ #include #include +void CConnmanTest::AddNode(CNode &node) { + LOCK(g_connman->cs_vNodes); + g_connman->vNodes.push_back(&node); +} + +void CConnmanTest::ClearNodes() { + LOCK(g_connman->cs_vNodes); + g_connman->vNodes.clear(); +} + uint256 insecure_rand_seed = GetRandHash(); FastRandomContext insecure_rand_ctx(insecure_rand_seed); @@ -100,7 +110,7 @@ // Deterministic randomness for tests. g_connman = std::unique_ptr(new CConnman(config, 0x1337, 0x1337)); connman = g_connman.get(); - peerLogic.reset(new PeerLogicValidation(connman)); + peerLogic.reset(new PeerLogicValidation(connman, scheduler)); } TestingSetup::~TestingSetup() {