Page Menu
Home
Phabricator
Search
Configure Global Search
Log In
Files
F13711278
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
11 KB
Subscribers
None
View Options
diff --git a/contrib/linearize/example-linearize.cfg b/contrib/linearize/example-linearize.cfg
index 071345f23a..e0fef13886 100644
--- a/contrib/linearize/example-linearize.cfg
+++ b/contrib/linearize/example-linearize.cfg
@@ -1,17 +1,19 @@
# bitcoind RPC settings (linearize-hashes)
rpcuser=someuser
rpcpassword=somepassword
host=127.0.0.1
port=8332
# bootstrap.dat hashlist settings (linearize-hashes)
max_height=313000
# bootstrap.dat input/output settings (linearize-data)
netmagic=f9beb4d9
input=/home/example/.bitcoin/blocks
output_file=/home/example/Downloads/bootstrap.dat
hashlist=hashlist.txt
split_year=1
+# Maxmimum size in bytes of out-of-order blocks cache in memory
+out_of_order_cache_sz = 100000000
diff --git a/contrib/linearize/linearize-data.py b/contrib/linearize/linearize-data.py
index 3b5d198c14..2dac3a614b 100755
--- a/contrib/linearize/linearize-data.py
+++ b/contrib/linearize/linearize-data.py
@@ -1,237 +1,299 @@
#!/usr/bin/python
#
# linearize-data.py: Construct a linear, no-fork version of the chain.
#
-# Copyright (c) 2013 The Bitcoin developers
+# Copyright (c) 2013-2014 The Bitcoin developers
# Distributed under the MIT/X11 software license, see the accompanying
# file COPYING or http://www.opensource.org/licenses/mit-license.php.
#
+from __future__ import print_function, division
import json
import struct
import re
import os
import base64
import httplib
import sys
import hashlib
import datetime
import time
+from collections import namedtuple
settings = {}
-
def uint32(x):
return x & 0xffffffffL
def bytereverse(x):
return uint32(( ((x) << 24) | (((x) << 8) & 0x00ff0000) |
(((x) >> 8) & 0x0000ff00) | ((x) >> 24) ))
def bufreverse(in_buf):
out_words = []
for i in range(0, len(in_buf), 4):
word = struct.unpack('@I', in_buf[i:i+4])[0]
out_words.append(struct.pack('@I', bytereverse(word)))
return ''.join(out_words)
def wordreverse(in_buf):
out_words = []
for i in range(0, len(in_buf), 4):
out_words.append(in_buf[i:i+4])
out_words.reverse()
return ''.join(out_words)
def calc_hdr_hash(blk_hdr):
hash1 = hashlib.sha256()
hash1.update(blk_hdr)
hash1_o = hash1.digest()
hash2 = hashlib.sha256()
hash2.update(hash1_o)
hash2_o = hash2.digest()
return hash2_o
def calc_hash_str(blk_hdr):
hash = calc_hdr_hash(blk_hdr)
hash = bufreverse(hash)
hash = wordreverse(hash)
hash_str = hash.encode('hex')
return hash_str
def get_blk_dt(blk_hdr):
members = struct.unpack("<I", blk_hdr[68:68+4])
nTime = members[0]
dt = datetime.datetime.fromtimestamp(nTime)
dt_ym = datetime.datetime(dt.year, dt.month, 1)
return (dt_ym, nTime)
def get_block_hashes(settings):
blkindex = []
f = open(settings['hashlist'], "r")
for line in f:
line = line.rstrip()
blkindex.append(line)
print("Read " + str(len(blkindex)) + " hashes")
return blkindex
-def mkblockset(blkindex):
+def mkblockmap(blkindex):
blkmap = {}
- for hash in blkindex:
- blkmap[hash] = True
+ for height,hash in enumerate(blkindex):
+ blkmap[hash] = height
return blkmap
-def copydata(settings, blkindex, blkset):
- inFn = 0
- inF = None
- outFn = 0
- outsz = 0
- outF = None
- outFname = None
- blkCount = 0
-
- lastDate = datetime.datetime(2000, 1, 1)
- highTS = 1408893517 - 315360000
- timestampSplit = False
- fileOutput = True
- setFileTime = False
- maxOutSz = settings['max_out_sz']
- if 'output' in settings:
- fileOutput = False
- if settings['file_timestamp'] != 0:
- setFileTime = True
- if settings['split_timestamp'] != 0:
- timestampSplit = True
-
- while True:
- if not inF:
- fname = "%s/blk%05d.dat" % (settings['input'], inFn)
- print("Input file" + fname)
- try:
- inF = open(fname, "rb")
- except IOError:
- print "Done"
- return
-
- inhdr = inF.read(8)
- if (not inhdr or (inhdr[0] == "\0")):
- inF.close()
- inF = None
- inFn = inFn + 1
- continue
-
- inMagic = inhdr[:4]
- if (inMagic != settings['netmagic']):
- print("Invalid magic:" + inMagic)
- return
- inLenLE = inhdr[4:]
- su = struct.unpack("<I", inLenLE)
- inLen = su[0]
- rawblock = inF.read(inLen)
- blk_hdr = rawblock[:80]
-
- hash_str = calc_hash_str(blk_hdr)
- if not hash_str in blkset:
- print("Skipping unknown block " + hash_str)
- continue
-
- if blkindex[blkCount] != hash_str:
- print("Out of order block.")
- print("Expected " + blkindex[blkCount])
- print("Got " + hash_str)
- sys.exit(1)
-
- if not fileOutput and ((outsz + inLen) > maxOutSz):
- outF.close()
- if setFileTime:
+# Block header and extent on disk
+BlockExtent = namedtuple('BlockExtent', ['fn', 'offset', 'inhdr', 'blkhdr', 'size'])
+
+class BlockDataCopier:
+ def __init__(self, settings, blkindex, blkmap):
+ self.settings = settings
+ self.blkindex = blkindex
+ self.blkmap = blkmap
+
+ self.inFn = 0
+ self.inF = None
+ self.outFn = 0
+ self.outsz = 0
+ self.outF = None
+ self.outFname = None
+ self.blkCountIn = 0
+ self.blkCountOut = 0
+
+ self.lastDate = datetime.datetime(2000, 1, 1)
+ self.highTS = 1408893517 - 315360000
+ self.timestampSplit = False
+ self.fileOutput = True
+ self.setFileTime = False
+ self.maxOutSz = settings['max_out_sz']
+ if 'output' in settings:
+ self.fileOutput = False
+ if settings['file_timestamp'] != 0:
+ self.setFileTime = True
+ if settings['split_timestamp'] != 0:
+ self.timestampSplit = True
+ # Extents and cache for out-of-order blocks
+ self.blockExtents = {}
+ self.outOfOrderData = {}
+ self.outOfOrderSize = 0 # running total size for items in outOfOrderData
+
+ def writeBlock(self, inhdr, blk_hdr, rawblock):
+ if not self.fileOutput and ((self.outsz + self.inLen) > self.maxOutSz):
+ self.outF.close()
+ if self.setFileTime:
os.utime(outFname, (int(time.time()), highTS))
- outF = None
- outFname = None
- outFn = outFn + 1
- outsz = 0
+ self.outF = None
+ self.outFname = None
+ self.outFn = outFn + 1
+ self.outsz = 0
(blkDate, blkTS) = get_blk_dt(blk_hdr)
- if timestampSplit and (blkDate > lastDate):
+ if self.timestampSplit and (blkDate > self.lastDate):
print("New month " + blkDate.strftime("%Y-%m") + " @ " + hash_str)
lastDate = blkDate
if outF:
outF.close()
if setFileTime:
os.utime(outFname, (int(time.time()), highTS))
- outF = None
- outFname = None
- outFn = outFn + 1
- outsz = 0
-
- if not outF:
- if fileOutput:
- outFname = settings['output_file']
+ self.outF = None
+ self.outFname = None
+ self.outFn = self.outFn + 1
+ self.outsz = 0
+
+ if not self.outF:
+ if self.fileOutput:
+ outFname = self.settings['output_file']
else:
- outFname = "%s/blk%05d.dat" % (settings['output'], outFn)
+ outFname = "%s/blk%05d.dat" % (self.settings['output'], outFn)
print("Output file" + outFname)
- outF = open(outFname, "wb")
-
- outF.write(inhdr)
- outF.write(rawblock)
- outsz = outsz + inLen + 8
-
- blkCount = blkCount + 1
- if blkTS > highTS:
- highTS = blkTS
-
- if (blkCount % 1000) == 0:
- print("Wrote " + str(blkCount) + " blocks")
+ self.outF = open(outFname, "wb")
+
+ self.outF.write(inhdr)
+ self.outF.write(blk_hdr)
+ self.outF.write(rawblock)
+ self.outsz = self.outsz + len(inhdr) + len(blk_hdr) + len(rawblock)
+
+ self.blkCountOut = self.blkCountOut + 1
+ if blkTS > self.highTS:
+ self.highTS = blkTS
+
+ if (self.blkCountOut % 1000) == 0:
+ print('%i blocks scanned, %i blocks written (of %i, %.1f%% complete)' %
+ (self.blkCountIn, self.blkCountOut, len(self.blkindex), 100.0 * self.blkCountOut / len(self.blkindex)))
+
+ def inFileName(self, fn):
+ return "%s/blk%05d.dat" % (self.settings['input'], fn)
+
+ def fetchBlock(self, extent):
+ '''Fetch block contents from disk given extents'''
+ with open(self.inFileName(extent.fn), "rb") as f:
+ f.seek(extent.offset)
+ return f.read(extent.size)
+
+ def copyOneBlock(self):
+ '''Find the next block to be written in the input, and copy it to the output.'''
+ extent = self.blockExtents.pop(self.blkCountOut)
+ if self.blkCountOut in self.outOfOrderData:
+ # If the data is cached, use it from memory and remove from the cache
+ rawblock = self.outOfOrderData.pop(self.blkCountOut)
+ self.outOfOrderSize -= len(rawblock)
+ else: # Otherwise look up data on disk
+ rawblock = self.fetchBlock(extent)
+
+ self.writeBlock(extent.inhdr, extent.blkhdr, rawblock)
+
+ def run(self):
+ while self.blkCountOut < len(self.blkindex):
+ if not self.inF:
+ fname = self.inFileName(self.inFn)
+ print("Input file" + fname)
+ try:
+ self.inF = open(fname, "rb")
+ except IOError:
+ print("Premature end of block data")
+ return
+
+ inhdr = self.inF.read(8)
+ if (not inhdr or (inhdr[0] == "\0")):
+ self.inF.close()
+ self.inF = None
+ self.inFn = self.inFn + 1
+ continue
+
+ inMagic = inhdr[:4]
+ if (inMagic != self.settings['netmagic']):
+ print("Invalid magic:" + inMagic)
+ return
+ inLenLE = inhdr[4:]
+ su = struct.unpack("<I", inLenLE)
+ inLen = su[0] - 80 # length without header
+ blk_hdr = self.inF.read(80)
+ inExtent = BlockExtent(self.inFn, self.inF.tell(), inhdr, blk_hdr, inLen)
+
+ hash_str = calc_hash_str(blk_hdr)
+ if not hash_str in blkmap:
+ print("Skipping unknown block " + hash_str)
+ self.inF.seek(inLen, os.SEEK_CUR)
+ continue
+
+ blkHeight = self.blkmap[hash_str]
+ self.blkCountIn += 1
+
+ if self.blkCountOut == blkHeight:
+ # If in-order block, just copy
+ rawblock = self.inF.read(inLen)
+ self.writeBlock(inhdr, blk_hdr, rawblock)
+
+ # See if we can catch up to prior out-of-order blocks
+ while self.blkCountOut in self.blockExtents:
+ self.copyOneBlock()
+
+ else: # If out-of-order, skip over block data for now
+ self.blockExtents[blkHeight] = inExtent
+ if self.outOfOrderSize < self.settings['out_of_order_cache_sz']:
+ # If there is space in the cache, read the data
+ # Reading the data in file sequence instead of seeking and fetching it later is preferred,
+ # but we don't want to fill up memory
+ self.outOfOrderData[blkHeight] = self.inF.read(inLen)
+ self.outOfOrderSize += inLen
+ else: # If no space in cache, seek forward
+ self.inF.seek(inLen, os.SEEK_CUR)
+
+ print("Done (%i blocks written)" % (self.blkCountOut))
if __name__ == '__main__':
if len(sys.argv) != 2:
- print "Usage: linearize-data.py CONFIG-FILE"
+ print("Usage: linearize-data.py CONFIG-FILE")
sys.exit(1)
f = open(sys.argv[1])
for line in f:
# skip comment lines
m = re.search('^\s*#', line)
if m:
continue
# parse key=value lines
m = re.search('^(\w+)\s*=\s*(\S.*)$', line)
if m is None:
continue
settings[m.group(1)] = m.group(2)
f.close()
if 'netmagic' not in settings:
settings['netmagic'] = 'f9beb4d9'
if 'input' not in settings:
settings['input'] = 'input'
if 'hashlist' not in settings:
settings['hashlist'] = 'hashlist.txt'
if 'file_timestamp' not in settings:
settings['file_timestamp'] = 0
if 'split_timestamp' not in settings:
settings['split_timestamp'] = 0
if 'max_out_sz' not in settings:
settings['max_out_sz'] = 1000L * 1000 * 1000
+ if 'out_of_order_cache_sz' not in settings:
+ settings['out_of_order_cache_sz'] = 100 * 1000 * 1000
settings['max_out_sz'] = long(settings['max_out_sz'])
settings['split_timestamp'] = int(settings['split_timestamp'])
settings['file_timestamp'] = int(settings['file_timestamp'])
settings['netmagic'] = settings['netmagic'].decode('hex')
+ settings['out_of_order_cache_sz'] = int(settings['out_of_order_cache_sz'])
if 'output_file' not in settings and 'output' not in settings:
print("Missing output file / directory")
sys.exit(1)
blkindex = get_block_hashes(settings)
- blkset = mkblockset(blkindex)
+ blkmap = mkblockmap(blkindex)
- if not "000000000019d6689c085ae165831e934ff763ae46a2a6c172b3f1b60a8ce26f" in blkset:
+ if not "000000000019d6689c085ae165831e934ff763ae46a2a6c172b3f1b60a8ce26f" in blkmap:
print("not found")
else:
- copydata(settings, blkindex, blkset)
+ BlockDataCopier(settings, blkindex, blkmap).run()
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Sun, Apr 27, 11:19 (1 d, 11 h)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
5568570
Default Alt Text
(11 KB)
Attached To
rSTAGING Bitcoin ABC staging
Event Timeline
Log In to Comment