Changeset View
Changeset View
Standalone View
Standalone View
test/functional/feature_dbcrash.py
Show First 20 Lines • Show All 96 Lines • ▼ Show 20 Lines | def restart_node(self, node_index, expected_tip): | ||||
time.sleep(1) | time.sleep(1) | ||||
# If we got here, bitcoind isn't coming back up on restart. Could be a | # If we got here, bitcoind isn't coming back up on restart. Could be a | ||||
# bug in bitcoind, or we've gotten unlucky with our dbcrash ratio -- | # bug in bitcoind, or we've gotten unlucky with our dbcrash ratio -- | ||||
# perhaps we generated a test case that blew up our cache? | # perhaps we generated a test case that blew up our cache? | ||||
# TODO: If this happens a lot, we should try to restart without -dbcrashratio | # TODO: If this happens a lot, we should try to restart without -dbcrashratio | ||||
# and make sure that recovery happens. | # and make sure that recovery happens. | ||||
raise AssertionError( | raise AssertionError( | ||||
"Unable to successfully restart node %d in allotted time", node_index) | "Unable to successfully restart node {} in allotted time".format(node_index)) | ||||
def submit_block_catch_error(self, node_index, block): | def submit_block_catch_error(self, node_index, block): | ||||
"""Try submitting a block to the given node. | """Try submitting a block to the given node. | ||||
Catch any exceptions that indicate the node has crashed. | Catch any exceptions that indicate the node has crashed. | ||||
Returns true if the block was submitted successfully; false otherwise.""" | Returns true if the block was submitted successfully; false otherwise.""" | ||||
try: | try: | ||||
self.nodes[node_index].submitblock(block) | self.nodes[node_index].submitblock(block) | ||||
return True | return True | ||||
except http.client.BadStatusLine as e: | except http.client.BadStatusLine as e: | ||||
# Prior to 3.5 BadStatusLine('') was raised for a remote disconnect error. | # Prior to 3.5 BadStatusLine('') was raised for a remote disconnect error. | ||||
if sys.version_info[0] == 3 and sys.version_info[1] < 5 and e.line == "''": | if sys.version_info[0] == 3 and sys.version_info[1] < 5 and e.line == "''": | ||||
self.log.debug( | self.log.debug( | ||||
"node %d submitblock raised exception: %s", node_index, e) | "node {} submitblock raised exception: {}".format(node_index, e)) | ||||
return False | return False | ||||
else: | else: | ||||
raise | raise | ||||
except tuple(HTTP_DISCONNECT_ERRORS) as e: | except tuple(HTTP_DISCONNECT_ERRORS) as e: | ||||
self.log.debug( | self.log.debug( | ||||
"node %d submitblock raised exception: %s", node_index, e) | "node {} submitblock raised exception: {}".format(node_index, e)) | ||||
return False | return False | ||||
except OSError as e: | except OSError as e: | ||||
self.log.debug( | self.log.debug( | ||||
"node %d submitblock raised OSError exception: errno=%s", node_index, e.errno) | "node {} submitblock raised OSError exception: errno={}".format(node_index, e.errno)) | ||||
if e.errno in [errno.EPIPE, errno.ECONNREFUSED, errno.ECONNRESET]: | if e.errno in [errno.EPIPE, errno.ECONNREFUSED, errno.ECONNRESET]: | ||||
# The node has likely crashed | # The node has likely crashed | ||||
return False | return False | ||||
else: | else: | ||||
# Unexpected exception, raise | # Unexpected exception, raise | ||||
raise | raise | ||||
def sync_node3blocks(self, block_hashes): | def sync_node3blocks(self, block_hashes): | ||||
Show All 9 Lines | def sync_node3blocks(self, block_hashes): | ||||
blocks = [] | blocks = [] | ||||
for block_hash in block_hashes: | for block_hash in block_hashes: | ||||
blocks.append( | blocks.append( | ||||
[block_hash, self.nodes[3].getblock(block_hash, False)]) | [block_hash, self.nodes[3].getblock(block_hash, False)]) | ||||
# Deliver each block to each other node | # Deliver each block to each other node | ||||
for i in range(3): | for i in range(3): | ||||
nodei_utxo_hash = None | nodei_utxo_hash = None | ||||
self.log.debug("Syncing blocks to node %d", i) | self.log.debug("Syncing blocks to node {}".format(i)) | ||||
for (block_hash, block) in blocks: | for (block_hash, block) in blocks: | ||||
# Get the block from node3, and submit to node_i | # Get the block from node3, and submit to node_i | ||||
self.log.debug("submitting block %s", block_hash) | self.log.debug("submitting block {}".format(block_hash)) | ||||
if not self.submit_block_catch_error(i, block): | if not self.submit_block_catch_error(i, block): | ||||
# TODO: more carefully check that the crash is due to -dbcrashratio | # TODO: more carefully check that the crash is due to -dbcrashratio | ||||
# (change the exit code perhaps, and check that here?) | # (change the exit code perhaps, and check that here?) | ||||
self.wait_for_node_exit(i, timeout=30) | self.wait_for_node_exit(i, timeout=30) | ||||
self.log.debug( | self.log.debug( | ||||
"Restarting node %d after block hash %s", i, block_hash) | "Restarting node %d after block hash {}".format(i, block_hash)) | ||||
nodei_utxo_hash = self.restart_node(i, block_hash) | nodei_utxo_hash = self.restart_node(i, block_hash) | ||||
assert nodei_utxo_hash is not None | assert nodei_utxo_hash is not None | ||||
self.restart_counts[i] += 1 | self.restart_counts[i] += 1 | ||||
else: | else: | ||||
# Clear it out after successful submitblock calls -- the cached | # Clear it out after successful submitblock calls -- the cached | ||||
# utxo hash will no longer be correct | # utxo hash will no longer be correct | ||||
nodei_utxo_hash = None | nodei_utxo_hash = None | ||||
# Check that the utxo hash matches node3's utxo set | # Check that the utxo hash matches node3's utxo set | ||||
# NOTE: we only check the utxo set if we had to restart the node | # NOTE: we only check the utxo set if we had to restart the node | ||||
# after the last block submitted: | # after the last block submitted: | ||||
# - checking the utxo hash causes a cache flush, which we don't | # - checking the utxo hash causes a cache flush, which we don't | ||||
# want to do every time; so | # want to do every time; so | ||||
# - we only update the utxo cache after a node restart, since flushing | # - we only update the utxo cache after a node restart, since flushing | ||||
# the cache is a no-op at that point | # the cache is a no-op at that point | ||||
if nodei_utxo_hash is not None: | if nodei_utxo_hash is not None: | ||||
self.log.debug("Checking txoutsetinfo matches for node %d", i) | self.log.debug( | ||||
"Checking txoutsetinfo matches for node {}".format(i)) | |||||
assert_equal(nodei_utxo_hash, node3_utxo_hash) | assert_equal(nodei_utxo_hash, node3_utxo_hash) | ||||
def verify_utxo_hash(self): | def verify_utxo_hash(self): | ||||
"""Verify that the utxo hash of each node matches node3. | """Verify that the utxo hash of each node matches node3. | ||||
Restart any nodes that crash while querying.""" | Restart any nodes that crash while querying.""" | ||||
node3_utxo_hash = self.nodes[3].gettxoutsetinfo()['hash_serialized'] | node3_utxo_hash = self.nodes[3].gettxoutsetinfo()['hash_serialized'] | ||||
self.log.info("Verifying utxo hash matches for all nodes") | self.log.info("Verifying utxo hash matches for all nodes") | ||||
Show All 38 Lines | class ChainstateWriteCrashTest(BitcoinTestFramework): | ||||
def run_test(self): | def run_test(self): | ||||
# Track test coverage statistics | # Track test coverage statistics | ||||
self.restart_counts = [0, 0, 0] # Track the restarts for nodes 0-2 | self.restart_counts = [0, 0, 0] # Track the restarts for nodes 0-2 | ||||
self.crashed_on_restart = 0 # Track count of crashes during recovery | self.crashed_on_restart = 0 # Track count of crashes during recovery | ||||
# Start by creating a lot of utxos on node3 | # Start by creating a lot of utxos on node3 | ||||
initial_height = self.nodes[3].getblockcount() | initial_height = self.nodes[3].getblockcount() | ||||
utxo_list = create_confirmed_utxos(self.nodes[3], 5000) | utxo_list = create_confirmed_utxos(self.nodes[3], 5000) | ||||
self.log.info("Prepped %d utxo entries", len(utxo_list)) | self.log.info("Prepped {} utxo entries".format(len(utxo_list))) | ||||
# Sync these blocks with the other nodes | # Sync these blocks with the other nodes | ||||
block_hashes_to_sync = [] | block_hashes_to_sync = [] | ||||
for height in range(initial_height + 1, self.nodes[3].getblockcount() + 1): | for height in range(initial_height + 1, self.nodes[3].getblockcount() + 1): | ||||
block_hashes_to_sync.append(self.nodes[3].getblockhash(height)) | block_hashes_to_sync.append(self.nodes[3].getblockhash(height)) | ||||
self.log.debug("Syncing %d blocks with other nodes", | self.log.debug("Syncing {} blocks with other nodes".format( | ||||
len(block_hashes_to_sync)) | len(block_hashes_to_sync))) | ||||
# Syncing the blocks could cause nodes to crash, so the test begins here. | # Syncing the blocks could cause nodes to crash, so the test begins here. | ||||
self.sync_node3blocks(block_hashes_to_sync) | self.sync_node3blocks(block_hashes_to_sync) | ||||
starting_tip_height = self.nodes[3].getblockcount() | starting_tip_height = self.nodes[3].getblockcount() | ||||
# Main test loop: | # Main test loop: | ||||
# each time through the loop, generate a bunch of transactions, | # each time through the loop, generate a bunch of transactions, | ||||
# and then either mine a single new block on the tip, or some-sized reorg. | # and then either mine a single new block on the tip, or some-sized reorg. | ||||
for i in range(40): | for i in range(40): | ||||
self.log.info( | self.log.info( | ||||
"Iteration %d, generating 2500 transactions %s", i, self.restart_counts) | "Iteration {}, generating 2500 transactions {}".format( | ||||
i, self.restart_counts)) | |||||
# Generate a bunch of small-ish transactions | # Generate a bunch of small-ish transactions | ||||
self.generate_small_transactions(self.nodes[3], 2500, utxo_list) | self.generate_small_transactions(self.nodes[3], 2500, utxo_list) | ||||
# Pick a random block between current tip, and starting tip | # Pick a random block between current tip, and starting tip | ||||
current_height = self.nodes[3].getblockcount() | current_height = self.nodes[3].getblockcount() | ||||
random_height = random.randint(starting_tip_height, current_height) | random_height = random.randint(starting_tip_height, current_height) | ||||
self.log.debug("At height %d, considering height %d", | self.log.debug("At height {}, considering height {}".format( | ||||
current_height, random_height) | current_height, random_height)) | ||||
if random_height > starting_tip_height: | if random_height > starting_tip_height: | ||||
# Randomly reorg from this point with some probability (1/4 for | # Randomly reorg from this point with some probability (1/4 for | ||||
# tip, 1/5 for tip-1, ...) | # tip, 1/5 for tip-1, ...) | ||||
if random.random() < 1.0 / (current_height + 4 - random_height): | if random.random() < 1.0 / (current_height + 4 - random_height): | ||||
self.log.debug( | self.log.debug( | ||||
"Invalidating block at height %d", random_height) | "Invalidating block at height {}".format(random_height)) | ||||
self.nodes[3].invalidateblock( | self.nodes[3].invalidateblock( | ||||
self.nodes[3].getblockhash(random_height)) | self.nodes[3].getblockhash(random_height)) | ||||
# Now generate new blocks until we pass the old tip height | # Now generate new blocks until we pass the old tip height | ||||
self.log.debug("Mining longer tip") | self.log.debug("Mining longer tip") | ||||
block_hashes = [] | block_hashes = [] | ||||
while current_height + 1 > self.nodes[3].getblockcount(): | while current_height + 1 > self.nodes[3].getblockcount(): | ||||
block_hashes.extend(self.nodes[3].generate( | block_hashes.extend(self.nodes[3].generate( | ||||
min(10, current_height + 1 - self.nodes[3].getblockcount()))) | min(10, current_height + 1 - self.nodes[3].getblockcount()))) | ||||
self.log.debug("Syncing %d new blocks...", len(block_hashes)) | self.log.debug( | ||||
"Syncing {} new blocks...".format(len(block_hashes))) | |||||
self.sync_node3blocks(block_hashes) | self.sync_node3blocks(block_hashes) | ||||
utxo_list = self.nodes[3].listunspent() | utxo_list = self.nodes[3].listunspent() | ||||
self.log.debug("Node3 utxo count: %d", len(utxo_list)) | self.log.debug("Node3 utxo count: {}".format(len(utxo_list))) | ||||
# Check that the utxo hashes agree with node3 | # Check that the utxo hashes agree with node3 | ||||
# Useful side effect: each utxo cache gets flushed here, so that we | # Useful side effect: each utxo cache gets flushed here, so that we | ||||
# won't get crashes on shutdown at the end of the test. | # won't get crashes on shutdown at the end of the test. | ||||
self.verify_utxo_hash() | self.verify_utxo_hash() | ||||
# Check the test coverage | # Check the test coverage | ||||
self.log.info("Restarted nodes: %s; crashes on restart: %d", | self.log.info("Restarted nodes: {}; crashes on restart: {}".format( | ||||
self.restart_counts, self.crashed_on_restart) | self.restart_counts, self.crashed_on_restart)) | ||||
# If no nodes were restarted, we didn't test anything. | # If no nodes were restarted, we didn't test anything. | ||||
assert self.restart_counts != [0, 0, 0] | assert self.restart_counts != [0, 0, 0] | ||||
# Make sure we tested the case of crash-during-recovery. | # Make sure we tested the case of crash-during-recovery. | ||||
assert self.crashed_on_restart > 0 | assert self.crashed_on_restart > 0 | ||||
# Warn if any of the nodes escaped restart. | # Warn if any of the nodes escaped restart. | ||||
for i in range(3): | for i in range(3): | ||||
if self.restart_counts[i] == 0: | if self.restart_counts[i] == 0: | ||||
self.log.warn("Node %d never crashed during utxo flush!", i) | self.log.warn( | ||||
"Node {} never crashed during utxo flush!".format(i)) | |||||
if __name__ == "__main__": | if __name__ == "__main__": | ||||
ChainstateWriteCrashTest().main() | ChainstateWriteCrashTest().main() |