diff --git a/src/serialize_intcode.h b/src/serialize_intcode.h new file mode 100644 --- /dev/null +++ b/src/serialize_intcode.h @@ -0,0 +1,116 @@ +// Copyright (c) 2024 The Bitcoin developers +// Distributed under the MIT software license, see the accompanying +// file COPYING or http://www.opensource.org/licenses/mit-license.php. + +#ifndef BITCOIN_SERIALIZE_INTCODE_H +#define BITCOIN_SERIALIZE_INTCODE_H + +#include +#include +#include + +#include +#include + +const uint64_t VALID_RANGE[] = { + 0, + 0x80, + 0x4000, + 0x20'0000, + 0x1000'0000, + 0x08'0000'0000, + 0x0400'0000'0000, + 0x02'0000'0000'0000, + 0x0100'0000'0000'0000, +}; + +/** + * Write a 64-bit integer in intcode encoding: + * + * Similar to UTF-8, except data bytes don't have a 10xx'xxxx prefix + * + * Specification & rationale can be found here: + * https://ecashbuilders.notion.site/eCash-Mitra-Integer-Encoding-Spec-2ff596494dcf4d12b75c763464df39de?pvs=4 + * + * Examples: + * 0x0000'0000'0000'007f (7) -> "7f" + * 0x0000'0000'0000'3fff (14) -> "bfff" + * 0x0000'0000'001f'ffff (21) -> "dfffff" + * 0x0000'0000'0fff'ffff (28) -> "efffffff" + * 0x0000'0007'ffff'ffff (35) -> "f7ffffffff" + * 0x0000'03ff'ffff'ffff (42) -> "fbffffffffff" + * 0x0001'ffff'ffff'ffff (49) -> "fdffffffffffff" + * 0x00ff'ffff'ffff'ffff (56) -> "feffffffffffffff" + * 0xffff'ffff'ffff'ffff (63) -> "ffffffffffffffffff" + */ +template void WriteIntcode(Stream &os, uint64_t value) { + // Integers below 0x80 are just represented by themselves + if (value < 0x80) { + ser_writedata8(os, value); + return; + } + + // Number of bits required to represent `value` + const uint64_t numBits = CountBits(value); + // Number of bytes required to represent `value` + uint64_t numBytes = (numBits - 1) / 7; + if (numBytes >= 8) { + // For 0xff headers, the formula breaks, as it's not followed by a 0 bit + ser_writedata8(os, 0xff); + numBytes = 8; + } else { + // Bits to represent how many bytes are used for the data + const uint64_t header = (0xff << (8 - numBytes)) & 0xff; + // Prepend the header + value |= header << (8 * numBytes); + // Header adds 1 leading byte + numBytes++; + // Left align + value <<= 8 * (8 - numBytes); + } + + // Write remaining bytes as big-endian + for (size_t i = 0; i < numBytes; ++i) { + ser_writedata8(os, value >> 56); + value <<= 8; + } +} + +/** + * Read a 64-bit integer as intcode, see WriteIntcode. + * Specification & rationale can be found here: + * https://ecashbuilders.notion.site/eCash-Mitra-Integer-Encoding-Spec-2ff596494dcf4d12b75c763464df39de?pvs=4 + */ +template uint64_t ReadIntcode(Stream &is) { + // Read the header byte + const uint8_t header = ser_readdata8(is); + + // If the first bit is not set, the number is the first byte itself + if (header < 0x80) { + return header; + } + + // Number of leading ones in header (represents the number of extra bytes) + const uint64_t leadingOnes = 8 - CountBits(uint8_t(~header)); + + // Read the data bits from the header + const uint8_t mask = 0xff >> leadingOnes; + uint64_t result = header & mask; + + // Read remaining bytes as big-endian + for (size_t i = 0; i < leadingOnes; ++i) { + result <<= 8; + result |= ser_readdata8(is); + } + + if (result < VALID_RANGE[leadingOnes]) { + throw std::ios_base::failure(strprintf( + "non-canonical ReadMitraInt(): 0x%016x out of range for %d " + "leading ones", + result, leadingOnes)); + } + + return result; +} + +#endif // BITCOIN_SERIALIZE_INTCODE_H diff --git a/src/test/CMakeLists.txt b/src/test/CMakeLists.txt --- a/src/test/CMakeLists.txt +++ b/src/test/CMakeLists.txt @@ -33,6 +33,8 @@ data/blockfilters.json data/key_io_valid.json data/key_io_invalid.json + data/intcode_valid.json + data/intcode_invalid.json data/script_tests.json data/sighash.json data/tx_invalid.json @@ -214,6 +216,7 @@ script_tests.cpp scriptnum_tests.cpp serialize_tests.cpp + serialize_intcode_tests.cpp settings_tests.cpp shortidprocessor_tests.cpp sigcache_tests.cpp diff --git a/src/test/data/intcode_invalid.json b/src/test/data/intcode_invalid.json new file mode 100644 --- /dev/null +++ b/src/test/data/intcode_invalid.json @@ -0,0 +1,55 @@ +[ + "", + "80", + "8000", + "807f", + "c0", + "c000", + "c00000", + "c03fff", + "e0", + "e000", + "e00000", + "e0000000", + "e01fffff", + "f0", + "f000", + "f00000", + "f0000000", + "f000000000", + "f00fffffff", + "f8", + "f800", + "f80000", + "f8000000", + "f800000000", + "f80000000000", + "f807ffffffff", + "fc", + "fc00", + "fc0000", + "fc000000", + "fc00000000", + "fc0000000000", + "fc000000000000", + "fc03ffffffffff", + "fe", + "fe00", + "fe0000", + "fe000000", + "fe00000000", + "fe0000000000", + "fe000000000000", + "fe00000000000000", + "fe01ffffffffffff", + "ff", + "ff00", + "ff0000", + "ff000000", + "ff00000000", + "ff0000000000", + "ff000000000000", + "ff00000000000000", + "ff0000000000000000", + "ff00ffffffffffffff" +] diff --git a/src/test/data/intcode_valid.json b/src/test/data/intcode_valid.json new file mode 100644 --- /dev/null +++ b/src/test/data/intcode_valid.json @@ -0,0 +1,144 @@ +[ + ["0x00", "00"], + ["0x20", "20"], + ["0x7f", "7f"], + ["0xfe", "80fe"], + ["0x3fff", "bfff"], + ["0x1fffff", "dfffff"], + ["0x0fffffff", "efffffff"], + ["0x07ffffffff", "f7ffffffff"], + ["0x03ffffffffff", "fbffffffffff"], + ["0x01ffffffffffff", "fdffffffffffff"], + ["0x00ffffffffffffff", "feffffffffffffff"], + ["0x00ffffffffffffffff", "ffffffffffffffffff"], + ["0xde", "80de"], + ["0x0dea", "8dea"], + ["0xdead", "c0dead"], + ["0x0deadb", "cdeadb"], + ["0xdeadbe", "e0deadbe"], + ["0x0deadbee", "edeadbee"], + ["0xdeadbeef", "f0deadbeef"], + ["0x0deadbeefb", "f80deadbeefb"], + ["0xdeadbeefba", "f8deadbeefba"], + ["0x0deadbeefbad", "fc0deadbeefbad"], + ["0xdeadbeefbadc", "fcdeadbeefbadc"], + ["0x0deadbeefbadc0", "fe0deadbeefbadc0"], + ["0xdeadbeefbadc0f", "fedeadbeefbadc0f"], + ["0x0deadbeefbadc0ff", "ff0deadbeefbadc0ff"], + ["0xdeadbeefbadc0ffe", "ffdeadbeefbadc0ffe"], + ["0x000000000000007f", "7f"], + ["0x0000000000000080", "8080"], + ["0x00000000000000ff", "80ff"], + ["0x0000000000000100", "8100"], + ["0x00000000000001ff", "81ff"], + ["0x0000000000000200", "8200"], + ["0x00000000000003ff", "83ff"], + ["0x0000000000000400", "8400"], + ["0x00000000000007ff", "87ff"], + ["0x0000000000000800", "8800"], + ["0x0000000000000fff", "8fff"], + ["0x0000000000001000", "9000"], + ["0x0000000000001fff", "9fff"], + ["0x0000000000002000", "a000"], + ["0x0000000000003fff", "bfff"], + ["0x0000000000004000", "c04000"], + ["0x0000000000007fff", "c07fff"], + ["0x0000000000008000", "c08000"], + ["0x000000000000ffff", "c0ffff"], + ["0x0000000000010000", "c10000"], + ["0x000000000001ffff", "c1ffff"], + ["0x0000000000020000", "c20000"], + ["0x000000000003ffff", "c3ffff"], + ["0x0000000000040000", "c40000"], + ["0x000000000007ffff", "c7ffff"], + ["0x0000000000080000", "c80000"], + ["0x00000000000fffff", "cfffff"], + ["0x0000000000100000", "d00000"], + ["0x00000000001fffff", "dfffff"], + ["0x0000000000200000", "e0200000"], + ["0x00000000003fffff", "e03fffff"], + ["0x0000000000400000", "e0400000"], + ["0x00000000007fffff", "e07fffff"], + ["0x0000000000800000", "e0800000"], + ["0x0000000000ffffff", "e0ffffff"], + ["0x0000000001000000", "e1000000"], + ["0x0000000001ffffff", "e1ffffff"], + ["0x0000000002000000", "e2000000"], + ["0x0000000003ffffff", "e3ffffff"], + ["0x0000000004000000", "e4000000"], + ["0x0000000007ffffff", "e7ffffff"], + ["0x0000000008000000", "e8000000"], + ["0x000000000fffffff", "efffffff"], + ["0x0000000010000000", "f010000000"], + ["0x000000001fffffff", "f01fffffff"], + ["0x0000000020000000", "f020000000"], + ["0x000000003fffffff", "f03fffffff"], + ["0x0000000040000000", "f040000000"], + ["0x000000007fffffff", "f07fffffff"], + ["0x0000000080000000", "f080000000"], + ["0x00000000ffffffff", "f0ffffffff"], + ["0x0000000100000000", "f100000000"], + ["0x00000001ffffffff", "f1ffffffff"], + ["0x0000000200000000", "f200000000"], + ["0x00000003ffffffff", "f3ffffffff"], + ["0x0000000400000000", "f400000000"], + ["0x00000007ffffffff", "f7ffffffff"], + ["0x0000000800000000", "f80800000000"], + ["0x0000000fffffffff", "f80fffffffff"], + ["0x0000001000000000", "f81000000000"], + ["0x0000001fffffffff", "f81fffffffff"], + ["0x0000002000000000", "f82000000000"], + ["0x0000003fffffffff", "f83fffffffff"], + ["0x0000004000000000", "f84000000000"], + ["0x0000007fffffffff", "f87fffffffff"], + ["0x0000008000000000", "f88000000000"], + ["0x000000ffffffffff", "f8ffffffffff"], + ["0x0000010000000000", "f90000000000"], + ["0x000001ffffffffff", "f9ffffffffff"], + ["0x0000020000000000", "fa0000000000"], + ["0x000003ffffffffff", "fbffffffffff"], + ["0x0000040000000000", "fc040000000000"], + ["0x000007ffffffffff", "fc07ffffffffff"], + ["0x0000080000000000", "fc080000000000"], + ["0x00000fffffffffff", "fc0fffffffffff"], + ["0x0000100000000000", "fc100000000000"], + ["0x00001fffffffffff", "fc1fffffffffff"], + ["0x0000200000000000", "fc200000000000"], + ["0x00003fffffffffff", "fc3fffffffffff"], + ["0x0000400000000000", "fc400000000000"], + ["0x00007fffffffffff", "fc7fffffffffff"], + ["0x0000800000000000", "fc800000000000"], + ["0x0000ffffffffffff", "fcffffffffffff"], + ["0x0001000000000000", "fd000000000000"], + ["0x0001ffffffffffff", "fdffffffffffff"], + ["0x0002000000000000", "fe02000000000000"], + ["0x0003ffffffffffff", "fe03ffffffffffff"], + ["0x0004000000000000", "fe04000000000000"], + ["0x0007ffffffffffff", "fe07ffffffffffff"], + ["0x0008000000000000", "fe08000000000000"], + ["0x000fffffffffffff", "fe0fffffffffffff"], + ["0x0010000000000000", "fe10000000000000"], + ["0x001fffffffffffff", "fe1fffffffffffff"], + ["0x0020000000000000", "fe20000000000000"], + ["0x003fffffffffffff", "fe3fffffffffffff"], + ["0x0040000000000000", "fe40000000000000"], + ["0x007fffffffffffff", "fe7fffffffffffff"], + ["0x0080000000000000", "fe80000000000000"], + ["0x00ffffffffffffff", "feffffffffffffff"], + ["0x0100000000000000", "ff0100000000000000"], + ["0x01ffffffffffffff", "ff01ffffffffffffff"], + ["0x0200000000000000", "ff0200000000000000"], + ["0x03ffffffffffffff", "ff03ffffffffffffff"], + ["0x0400000000000000", "ff0400000000000000"], + ["0x07ffffffffffffff", "ff07ffffffffffffff"], + ["0x0800000000000000", "ff0800000000000000"], + ["0x0fffffffffffffff", "ff0fffffffffffffff"], + ["0x1000000000000000", "ff1000000000000000"], + ["0x1fffffffffffffff", "ff1fffffffffffffff"], + ["0x2000000000000000", "ff2000000000000000"], + ["0x3fffffffffffffff", "ff3fffffffffffffff"], + ["0x4000000000000000", "ff4000000000000000"], + ["0x7fffffffffffffff", "ff7fffffffffffffff"], + ["0x8000000000000000", "ff8000000000000000"], + ["0xffffffffffffffff", "ffffffffffffffffff"] +] diff --git a/src/test/serialize_intcode_tests.cpp b/src/test/serialize_intcode_tests.cpp new file mode 100644 --- /dev/null +++ b/src/test/serialize_intcode_tests.cpp @@ -0,0 +1,94 @@ +// Copyright (c) 2024 The Bitcoin developers +// Distributed under the MIT software license, see the accompanying +// file COPYING or http://www.opensource.org/licenses/mit-license.php. + +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include + +BOOST_FIXTURE_TEST_SUITE(serialize_intcode_tests, BasicTestingSetup) + +BOOST_AUTO_TEST_CASE(example_intcode_valid_json) { + UniValue tests = read_json(std::string( + json_tests::intcode_valid, + json_tests::intcode_valid + sizeof(json_tests::intcode_valid))); + + for (size_t idx = 0; idx < tests.size(); idx++) { + UniValue test = tests[idx]; + std::string strTest = test.write(); + if (!test.isArray()) { + BOOST_ERROR("Bad test, expected array: " << strTest); + continue; + } + if (test.size() != 2 || !test[0].isStr() || !test[1].isStr()) { + BOOST_ERROR("Bad test, expected two strings: " << strTest); + continue; + } + const std::string &integerStr = test[0].get_str(); + const std::string &encodedHex = test[1].get_str(); + + uint64_t num = 0; + if (integerStr.substr(0, 2) == "0x" && integerStr.size() > 2) { + // Hex numbers + if (!IsHex(std::string(integerStr.begin() + 2, integerStr.end()))) { + BOOST_ERROR("Bad test, invalid hex: '" << integerStr << "'."); + } + std::vector raw = + ParseHex(std::string(integerStr.begin() + 2, integerStr.end())); + for (uint8_t byte : raw) { + num = (num << 8) | byte; + } + } else { + BOOST_ERROR( + "Bad test, expected hex of the form 0x...: " << integerStr); + } + + const std::vector encoded = ParseHex(encodedHex); + + CDataStream ss(SER_DISK, 0); + WriteIntcode(ss, num); + BOOST_CHECK_EQUAL(HexStr(ss), encodedHex); + + ss.clear(); + ss.write(MakeByteSpan(encoded)); + BOOST_CHECK_EQUAL(ReadIntcode(ss), num); + } +} + +BOOST_AUTO_TEST_CASE(example_intcode_invalid_json) { + UniValue tests = read_json(std::string( + json_tests::intcode_invalid, + json_tests::intcode_invalid + sizeof(json_tests::intcode_invalid))); + + for (size_t idx = 0; idx < tests.size(); idx++) { + const std::string &hex = tests[idx].get_str(); + const std::vector invalid = ParseHex(hex); + CDataStream ss(invalid, SER_DISK, 0); + BOOST_CHECK_THROW(ReadIntcode(ss), std::ios_base::failure); + } +} + +BOOST_AUTO_TEST_CASE(rng_roundtrip) { + MMIXLinearCongruentialGenerator lcg; + CDataStream ss(SER_DISK, 0); + + for (int i = 0; i < 1000000; ++i) { + uint64_t bits = (uint64_t(lcg.next()) << 32) | lcg.next(); + bits >>= lcg.next() % 64; + + WriteIntcode(ss, bits); + BOOST_CHECK_EQUAL(ReadIntcode(ss), bits); + ss.clear(); + } +} + +BOOST_AUTO_TEST_SUITE_END()