Page MenuHomePhabricator

D7080.diff
No OneTemporary

D7080.diff

diff --git a/doc/developer-notes.md b/doc/developer-notes.md
--- a/doc/developer-notes.md
+++ b/doc/developer-notes.md
@@ -17,6 +17,7 @@
- [DEBUG_LOCKORDER](#debug_lockorder)
- [Valgrind suppressions file](#valgrind-suppressions-file)
- [Compiling for test coverage](#compiling-for-test-coverage)
+ - [Performance profiling with perf](#performance-profiling-with-perf)
- [Sanitizers](#sanitizers)
- [Locking/mutex usage notes](#lockingmutex-usage-notes)
- [Threads](#threads)
@@ -302,6 +303,49 @@
To include branch coverage, you can add the `-DENABLE_BRANCH_COVERAGE=ON` option
to the `cmake` command line.
+### Performance profiling with perf
+
+Profiling is a good way to get a precise idea of where time is being spent in
+code. One tool for doing profiling on Linux platforms is called
+[`perf`](http://www.brendangregg.com/perf.html), and has been integrated into
+the functional test framework. Perf can observe a running process and sample
+(at some frequency) where its execution is.
+
+Perf installation is contingent on which kernel version you're running; see
+[this StackExchange
+thread](https://askubuntu.com/questions/50145/how-to-install-perf-monitoring-tool)
+for specific instructions.
+
+Certain kernel parameters may need to be set for perf to be able to inspect the
+running process' stack.
+
+```sh
+$ sudo sysctl -w kernel.perf_event_paranoid=-1
+$ sudo sysctl -w kernel.kptr_restrict=0
+```
+
+Make sure you [understand the security
+trade-offs](https://lwn.net/Articles/420403/) of setting these kernel
+parameters.
+
+To profile a running bitcoind process for 60 seconds, you could use an
+invocation of `perf record` like this:
+
+```sh
+$ perf record \
+ -g --call-graph dwarf --per-thread -F 140 \
+ -p `pgrep bitcoind` -- sleep 60
+```
+
+You could then analyze the results by running
+
+```sh
+perf report --stdio | c++filt | less
+```
+
+or using a graphical tool like [Hotspot](https://github.com/KDAB/hotspot).
+
+See the functional test documentation for how to invoke perf within tests.
### Sanitizers
diff --git a/doc/functional-tests.md b/doc/functional-tests.md
--- a/doc/functional-tests.md
+++ b/doc/functional-tests.md
@@ -180,6 +180,39 @@
echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope
```
+### Benchmarking and profiling with perf
+
+An easy way to profile node performance during functional tests is provided
+for Linux platforms using `perf`.
+
+Perf will sample the running node and will generate profile data in the node's
+datadir. The profile data can then be presented using `perf report` or a graphical
+tool like [hotspot](https://github.com/KDAB/hotspot).
+
+There are two ways of invoking perf: one is to use the `--perf` flag when
+running tests, which will profile each node during the entire test run: perf
+begins to profile when the node starts and ends when it shuts down. The other
+way is the use the `profile_with_perf` context manager, e.g.
+
+```python
+with node.profile_with_perf("send-big-msgs"):
+ # Perform activity on the node you're interested in profiling, e.g.:
+ for _ in range(10000):
+ node.p2p.send_message(some_large_message)
+```
+
+To see useful textual output, run
+
+```sh
+perf report -i /path/to/datadir/send-big-msgs.perf.data.xxxx --stdio | c++filt | less
+```
+
+#### See also:
+
+- [Installing perf](https://askubuntu.com/q/50145)
+- [Perf examples](http://www.brendangregg.com/perf.html)
+- [Hotspot](https://github.com/KDAB/hotspot): a GUI for perf output analysis
+
##### Prevent using deprecated features
Python will issue a `DeprecationWarning` when a deprecated feature is
diff --git a/test/functional/test_framework/test_framework.py b/test/functional/test_framework/test_framework.py
--- a/test/functional/test_framework/test_framework.py
+++ b/test/functional/test_framework/test_framework.py
@@ -129,8 +129,11 @@
help="Attach a python debugger if test fails")
parser.add_argument("--usecli", dest="usecli", default=False, action="store_true",
help="use bitcoin-cli instead of RPC for all commands")
+ parser.add_argument("--perf", dest="perf", default=False, action="store_true",
+ help="profile running nodes with perf for the duration of the test")
parser.add_argument("--with-axionactivation", dest="axionactivation", default=False, action="store_true",
help="Activate axion update on timestamp {}".format(TIMESTAMP_IN_THE_PAST))
+
self.add_options(parser)
self.options = parser.parse_args()
@@ -212,9 +215,20 @@
self.log.info(
"Note: bitcoinds were not stopped and may still be running")
- if not self.options.nocleanup and not self.options.noshutdown and success != TestStatus.FAILED:
+ should_clean_up = (
+ not self.options.nocleanup and
+ not self.options.noshutdown and
+ success != TestStatus.FAILED and
+ not self.options.perf
+ )
+ if should_clean_up:
self.log.info("Cleaning up {} on exit".format(self.options.tmpdir))
cleanup_tree_on_exit = True
+ elif self.options.perf:
+ self.log.warning(
+ "Not cleaning up dir {} due to perf data".format(
+ self.options.tmpdir))
+ cleanup_tree_on_exit = False
else:
self.log.warning(
"Not cleaning up dir {}".format(self.options.tmpdir))
@@ -353,6 +367,7 @@
extra_args=extra_args[i],
use_cli=self.options.usecli,
emulator=self.options.emulator,
+ start_perf=self.options.perf,
))
if self.options.axionactivation:
self.nodes[i].extend_default_args(
diff --git a/test/functional/test_framework/test_node.py b/test/functional/test_framework/test_node.py
--- a/test/functional/test_framework/test_node.py
+++ b/test/functional/test_framework/test_node.py
@@ -19,6 +19,7 @@
import time
import urllib.parse
import collections
+import shlex
from .authproxy import JSONRPCException
from .messages import COIN, CTransaction, FromHex
@@ -59,7 +60,13 @@
be dispatched to the RPC connection."""
def __init__(self, i, datadir, *, chain, host, rpc_port, p2p_port, timewait, bitcoind, bitcoin_cli,
- coverage_dir, extra_conf=None, extra_args=None, use_cli=False, emulator=None):
+ coverage_dir, extra_conf=None, extra_args=None, use_cli=False, emulator=None, start_perf=False):
+ """
+ Kwargs:
+ start_perf (bool): If True, begin profiling the node with `perf` as soon as
+ the node starts.
+ """
+
self.index = i
self.datadir = datadir
self.bitcoinconf = os.path.join(self.datadir, "bitcoin.conf")
@@ -106,6 +113,7 @@
"Binary '{}' could not be found.\nTry setting it manually:\n\tBITCOINCLI=<path/to/bitcoin-cli> {}".format(bitcoin_cli, sys.argv[0]))
self.cli = TestNodeCLI(bitcoin_cli, self.datadir, self.emulator)
self.use_cli = use_cli
+ self.start_perf = start_perf
self.running = False
self.process = None
@@ -116,6 +124,8 @@
self.log = logging.getLogger('TestFramework.node{}'.format(i))
# Whether to kill the node when this object goes away
self.cleanup_on_exit = True
+ # Cache perf subprocesses here by their data output filename.
+ self.perf_subprocesses = {}
self.p2ps = []
AddressKeyPair = collections.namedtuple(
@@ -248,6 +258,9 @@
self.running = True
self.log.debug("bitcoind started, waiting for RPC to come up")
+ if self.start_perf:
+ self._start_perf()
+
def wait_for_rpc_connection(self):
"""Sets up an RPC connection to the bitcoind process. Returns False if unable to connect."""
# Poll at a rate of four times per second
@@ -317,6 +330,10 @@
except http.client.CannotSendRequest:
self.log.exception("Unable to stop node.")
+ # If there are any running perf processes, stop them.
+ for profile_name in tuple(self.perf_subprocesses.keys()):
+ self._stop_perf(profile_name)
+
# Check that stderr is as expected
self.stderr.seek(0)
stderr = self.stderr.read().decode('utf-8').strip()
@@ -402,6 +419,91 @@
'Expected messages "{}" does not partially match log:\n\n{}\n\n'.format(
str(expected_msgs), print_log))
+ @contextlib.contextmanager
+ def profile_with_perf(self, profile_name):
+ """
+ Context manager that allows easy profiling of node activity using `perf`.
+
+ See `test/functional/README.md` for details on perf usage.
+
+ Args:
+ profile_name (str): This string will be appended to the
+ profile data filename generated by perf.
+ """
+ subp = self._start_perf(profile_name)
+
+ yield
+
+ if subp:
+ self._stop_perf(profile_name)
+
+ def _start_perf(self, profile_name=None):
+ """Start a perf process to profile this node.
+
+ Returns the subprocess running perf."""
+ subp = None
+
+ def test_success(cmd):
+ return subprocess.call(
+ # shell=True required for pipe use below
+ cmd, shell=True,
+ stderr=subprocess.DEVNULL, stdout=subprocess.DEVNULL) == 0
+
+ if not sys.platform.startswith('linux'):
+ self.log.warning(
+ "Can't profile with perf; only availabe on Linux platforms")
+ return None
+
+ if not test_success('which perf'):
+ self.log.warning(
+ "Can't profile with perf; must install perf-tools")
+ return None
+
+ if not test_success(
+ 'readelf -S {} | grep .debug_str'.format(shlex.quote(self.binary))):
+ self.log.warning(
+ "perf output won't be very useful without debug symbols compiled into bitcoind")
+
+ output_path = tempfile.NamedTemporaryFile(
+ dir=self.datadir,
+ prefix="{}.perf.data.".format(profile_name or 'test'),
+ delete=False,
+ ).name
+
+ cmd = [
+ 'perf', 'record',
+ '-g', # Record the callgraph.
+ # Compatibility for gcc's --fomit-frame-pointer.
+ '--call-graph', 'dwarf',
+ '-F', '101', # Sampling frequency in Hz.
+ '-p', str(self.process.pid),
+ '-o', output_path,
+ ]
+ subp = subprocess.Popen(
+ cmd,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE)
+ self.perf_subprocesses[profile_name] = subp
+
+ return subp
+
+ def _stop_perf(self, profile_name):
+ """Stop (and pop) a perf subprocess."""
+ subp = self.perf_subprocesses.pop(profile_name)
+ output_path = subp.args[subp.args.index('-o') + 1]
+
+ subp.terminate()
+ subp.wait(timeout=10)
+
+ stderr = subp.stderr.read().decode()
+ if 'Consider tweaking /proc/sys/kernel/perf_event_paranoid' in stderr:
+ self.log.warning(
+ "perf couldn't collect data! Try "
+ "'sudo sysctl -w kernel.perf_event_paranoid=-1'")
+ else:
+ report_cmd = "perf report -i {}".format(output_path)
+ self.log.info("See perf output by running '{}'".format(report_cmd))
+
def assert_start_raises_init_error(
self, extra_args=None, expected_msg=None, match=ErrorMatch.FULL_TEXT, *args, **kwargs):
"""Attempt to start the node and expect it to raise an error.

File Metadata

Mime Type
text/plain
Expires
Sat, Mar 1, 11:14 (16 h, 3 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
5187546
Default Alt Text
D7080.diff (11 KB)

Event Timeline