No OneTemporary
Actions

Size

11 KB

Subscribers

None

View Options

	diff --git a/contrib/linearize/example-linearize.cfg b/contrib/linearize/example-linearize.cfg
	index 071345f23a..e0fef13886 100644
	--- a/contrib/linearize/example-linearize.cfg
	+++ b/contrib/linearize/example-linearize.cfg
	@@ -1,17 +1,19 @@

	# bitcoind RPC settings (linearize-hashes)
	rpcuser=someuser
	rpcpassword=somepassword
	host=127.0.0.1
	port=8332

	# bootstrap.dat hashlist settings (linearize-hashes)
	max_height=313000

	# bootstrap.dat input/output settings (linearize-data)
	netmagic=f9beb4d9
	input=/home/example/.bitcoin/blocks
	output_file=/home/example/Downloads/bootstrap.dat
	hashlist=hashlist.txt
	split_year=1

	+# Maxmimum size in bytes of out-of-order blocks cache in memory
	+out_of_order_cache_sz = 100000000
	diff --git a/contrib/linearize/linearize-data.py b/contrib/linearize/linearize-data.py
	index 3b5d198c14..2dac3a614b 100755
	--- a/contrib/linearize/linearize-data.py
	+++ b/contrib/linearize/linearize-data.py
	@@ -1,237 +1,299 @@
	#!/usr/bin/python
	#
	# linearize-data.py: Construct a linear, no-fork version of the chain.
	#
	-# Copyright (c) 2013 The Bitcoin developers
	+# Copyright (c) 2013-2014 The Bitcoin developers
	# Distributed under the MIT/X11 software license, see the accompanying
	# file COPYING or http://www.opensource.org/licenses/mit-license.php.
	#

	+from __future__ import print_function, division
	import json
	import struct
	import re
	import os
	import base64
	import httplib
	import sys
	import hashlib
	import datetime
	import time
	+from collections import namedtuple

	settings = {}

	-
	def uint32(x):
	return x & 0xffffffffL

	def bytereverse(x):
	return uint32(( ((x) << 24) \| (((x) << 8) & 0x00ff0000) \|
	(((x) >> 8) & 0x0000ff00) \| ((x) >> 24) ))

	def bufreverse(in_buf):
	out_words = []
	for i in range(0, len(in_buf), 4):
	word = struct.unpack('@I', in_buf[i:i+4])[0]
	out_words.append(struct.pack('@I', bytereverse(word)))
	return ''.join(out_words)

	def wordreverse(in_buf):
	out_words = []
	for i in range(0, len(in_buf), 4):
	out_words.append(in_buf[i:i+4])
	out_words.reverse()
	return ''.join(out_words)

	def calc_hdr_hash(blk_hdr):
	hash1 = hashlib.sha256()
	hash1.update(blk_hdr)
	hash1_o = hash1.digest()

	hash2 = hashlib.sha256()
	hash2.update(hash1_o)
	hash2_o = hash2.digest()

	return hash2_o

	def calc_hash_str(blk_hdr):
	hash = calc_hdr_hash(blk_hdr)
	hash = bufreverse(hash)
	hash = wordreverse(hash)
	hash_str = hash.encode('hex')
	return hash_str

	def get_blk_dt(blk_hdr):
	members = struct.unpack("<I", blk_hdr[68:68+4])
	nTime = members[0]
	dt = datetime.datetime.fromtimestamp(nTime)
	dt_ym = datetime.datetime(dt.year, dt.month, 1)
	return (dt_ym, nTime)

	def get_block_hashes(settings):
	blkindex = []
	f = open(settings['hashlist'], "r")
	for line in f:
	line = line.rstrip()
	blkindex.append(line)

	print("Read " + str(len(blkindex)) + " hashes")

	return blkindex

	-def mkblockset(blkindex):
	+def mkblockmap(blkindex):
	blkmap = {}
	- for hash in blkindex:
	- blkmap[hash] = True
	+ for height,hash in enumerate(blkindex):
	+ blkmap[hash] = height
	return blkmap

	-def copydata(settings, blkindex, blkset):
	- inFn = 0
	- inF = None
	- outFn = 0
	- outsz = 0
	- outF = None
	- outFname = None
	- blkCount = 0
	-
	- lastDate = datetime.datetime(2000, 1, 1)
	- highTS = 1408893517 - 315360000
	- timestampSplit = False
	- fileOutput = True
	- setFileTime = False
	- maxOutSz = settings['max_out_sz']
	- if 'output' in settings:
	- fileOutput = False
	- if settings['file_timestamp'] != 0:
	- setFileTime = True
	- if settings['split_timestamp'] != 0:
	- timestampSplit = True
	-
	- while True:
	- if not inF:
	- fname = "%s/blk%05d.dat" % (settings['input'], inFn)
	- print("Input file" + fname)
	- try:
	- inF = open(fname, "rb")
	- except IOError:
	- print "Done"
	- return
	-
	- inhdr = inF.read(8)
	- if (not inhdr or (inhdr[0] == "\0")):
	- inF.close()
	- inF = None
	- inFn = inFn + 1
	- continue
	-
	- inMagic = inhdr[:4]
	- if (inMagic != settings['netmagic']):
	- print("Invalid magic:" + inMagic)
	- return
	- inLenLE = inhdr[4:]
	- su = struct.unpack("<I", inLenLE)
	- inLen = su[0]
	- rawblock = inF.read(inLen)
	- blk_hdr = rawblock[:80]
	-
	- hash_str = calc_hash_str(blk_hdr)
	- if not hash_str in blkset:
	- print("Skipping unknown block " + hash_str)
	- continue
	-
	- if blkindex[blkCount] != hash_str:
	- print("Out of order block.")
	- print("Expected " + blkindex[blkCount])
	- print("Got " + hash_str)
	- sys.exit(1)
	-
	- if not fileOutput and ((outsz + inLen) > maxOutSz):
	- outF.close()
	- if setFileTime:
	+# Block header and extent on disk
	+BlockExtent = namedtuple('BlockExtent', ['fn', 'offset', 'inhdr', 'blkhdr', 'size'])
	+
	+class BlockDataCopier:
	+ def __init__(self, settings, blkindex, blkmap):
	+ self.settings = settings
	+ self.blkindex = blkindex
	+ self.blkmap = blkmap
	+
	+ self.inFn = 0
	+ self.inF = None
	+ self.outFn = 0
	+ self.outsz = 0
	+ self.outF = None
	+ self.outFname = None
	+ self.blkCountIn = 0
	+ self.blkCountOut = 0
	+
	+ self.lastDate = datetime.datetime(2000, 1, 1)
	+ self.highTS = 1408893517 - 315360000
	+ self.timestampSplit = False
	+ self.fileOutput = True
	+ self.setFileTime = False
	+ self.maxOutSz = settings['max_out_sz']
	+ if 'output' in settings:
	+ self.fileOutput = False
	+ if settings['file_timestamp'] != 0:
	+ self.setFileTime = True
	+ if settings['split_timestamp'] != 0:
	+ self.timestampSplit = True
	+ # Extents and cache for out-of-order blocks
	+ self.blockExtents = {}
	+ self.outOfOrderData = {}
	+ self.outOfOrderSize = 0 # running total size for items in outOfOrderData
	+
	+ def writeBlock(self, inhdr, blk_hdr, rawblock):
	+ if not self.fileOutput and ((self.outsz + self.inLen) > self.maxOutSz):
	+ self.outF.close()
	+ if self.setFileTime:
	os.utime(outFname, (int(time.time()), highTS))
	- outF = None
	- outFname = None
	- outFn = outFn + 1
	- outsz = 0
	+ self.outF = None
	+ self.outFname = None
	+ self.outFn = outFn + 1
	+ self.outsz = 0

	(blkDate, blkTS) = get_blk_dt(blk_hdr)
	- if timestampSplit and (blkDate > lastDate):
	+ if self.timestampSplit and (blkDate > self.lastDate):
	print("New month " + blkDate.strftime("%Y-%m") + " @ " + hash_str)
	lastDate = blkDate
	if outF:
	outF.close()
	if setFileTime:
	os.utime(outFname, (int(time.time()), highTS))
	- outF = None
	- outFname = None
	- outFn = outFn + 1
	- outsz = 0
	-
	- if not outF:
	- if fileOutput:
	- outFname = settings['output_file']
	+ self.outF = None
	+ self.outFname = None
	+ self.outFn = self.outFn + 1
	+ self.outsz = 0
	+
	+ if not self.outF:
	+ if self.fileOutput:
	+ outFname = self.settings['output_file']
	else:
	- outFname = "%s/blk%05d.dat" % (settings['output'], outFn)
	+ outFname = "%s/blk%05d.dat" % (self.settings['output'], outFn)
	print("Output file" + outFname)
	- outF = open(outFname, "wb")
	-
	- outF.write(inhdr)
	- outF.write(rawblock)
	- outsz = outsz + inLen + 8
	-
	- blkCount = blkCount + 1
	- if blkTS > highTS:
	- highTS = blkTS
	-
	- if (blkCount % 1000) == 0:
	- print("Wrote " + str(blkCount) + " blocks")
	+ self.outF = open(outFname, "wb")
	+
	+ self.outF.write(inhdr)
	+ self.outF.write(blk_hdr)
	+ self.outF.write(rawblock)
	+ self.outsz = self.outsz + len(inhdr) + len(blk_hdr) + len(rawblock)
	+
	+ self.blkCountOut = self.blkCountOut + 1
	+ if blkTS > self.highTS:
	+ self.highTS = blkTS
	+
	+ if (self.blkCountOut % 1000) == 0:
	+ print('%i blocks scanned, %i blocks written (of %i, %.1f%% complete)' %
	+ (self.blkCountIn, self.blkCountOut, len(self.blkindex), 100.0 * self.blkCountOut / len(self.blkindex)))
	+
	+ def inFileName(self, fn):
	+ return "%s/blk%05d.dat" % (self.settings['input'], fn)
	+
	+ def fetchBlock(self, extent):
	+ '''Fetch block contents from disk given extents'''
	+ with open(self.inFileName(extent.fn), "rb") as f:
	+ f.seek(extent.offset)
	+ return f.read(extent.size)
	+
	+ def copyOneBlock(self):
	+ '''Find the next block to be written in the input, and copy it to the output.'''
	+ extent = self.blockExtents.pop(self.blkCountOut)
	+ if self.blkCountOut in self.outOfOrderData:
	+ # If the data is cached, use it from memory and remove from the cache
	+ rawblock = self.outOfOrderData.pop(self.blkCountOut)
	+ self.outOfOrderSize -= len(rawblock)
	+ else: # Otherwise look up data on disk
	+ rawblock = self.fetchBlock(extent)
	+
	+ self.writeBlock(extent.inhdr, extent.blkhdr, rawblock)
	+
	+ def run(self):
	+ while self.blkCountOut < len(self.blkindex):
	+ if not self.inF:
	+ fname = self.inFileName(self.inFn)
	+ print("Input file" + fname)
	+ try:
	+ self.inF = open(fname, "rb")
	+ except IOError:
	+ print("Premature end of block data")
	+ return
	+
	+ inhdr = self.inF.read(8)
	+ if (not inhdr or (inhdr[0] == "\0")):
	+ self.inF.close()
	+ self.inF = None
	+ self.inFn = self.inFn + 1
	+ continue
	+
	+ inMagic = inhdr[:4]
	+ if (inMagic != self.settings['netmagic']):
	+ print("Invalid magic:" + inMagic)
	+ return
	+ inLenLE = inhdr[4:]
	+ su = struct.unpack("<I", inLenLE)
	+ inLen = su[0] - 80 # length without header
	+ blk_hdr = self.inF.read(80)
	+ inExtent = BlockExtent(self.inFn, self.inF.tell(), inhdr, blk_hdr, inLen)
	+
	+ hash_str = calc_hash_str(blk_hdr)
	+ if not hash_str in blkmap:
	+ print("Skipping unknown block " + hash_str)
	+ self.inF.seek(inLen, os.SEEK_CUR)
	+ continue
	+
	+ blkHeight = self.blkmap[hash_str]
	+ self.blkCountIn += 1
	+
	+ if self.blkCountOut == blkHeight:
	+ # If in-order block, just copy
	+ rawblock = self.inF.read(inLen)
	+ self.writeBlock(inhdr, blk_hdr, rawblock)
	+
	+ # See if we can catch up to prior out-of-order blocks
	+ while self.blkCountOut in self.blockExtents:
	+ self.copyOneBlock()
	+
	+ else: # If out-of-order, skip over block data for now
	+ self.blockExtents[blkHeight] = inExtent
	+ if self.outOfOrderSize < self.settings['out_of_order_cache_sz']:
	+ # If there is space in the cache, read the data
	+ # Reading the data in file sequence instead of seeking and fetching it later is preferred,
	+ # but we don't want to fill up memory
	+ self.outOfOrderData[blkHeight] = self.inF.read(inLen)
	+ self.outOfOrderSize += inLen
	+ else: # If no space in cache, seek forward
	+ self.inF.seek(inLen, os.SEEK_CUR)
	+
	+ print("Done (%i blocks written)" % (self.blkCountOut))

	if __name__ == '__main__':
	if len(sys.argv) != 2:
	- print "Usage: linearize-data.py CONFIG-FILE"
	+ print("Usage: linearize-data.py CONFIG-FILE")
	sys.exit(1)

	f = open(sys.argv[1])
	for line in f:
	# skip comment lines
	m = re.search('^\s*#', line)
	if m:
	continue

	# parse key=value lines
	m = re.search('^(\w+)\s=\s(\S.*)$', line)
	if m is None:
	continue
	settings[m.group(1)] = m.group(2)
	f.close()

	if 'netmagic' not in settings:
	settings['netmagic'] = 'f9beb4d9'
	if 'input' not in settings:
	settings['input'] = 'input'
	if 'hashlist' not in settings:
	settings['hashlist'] = 'hashlist.txt'
	if 'file_timestamp' not in settings:
	settings['file_timestamp'] = 0
	if 'split_timestamp' not in settings:
	settings['split_timestamp'] = 0
	if 'max_out_sz' not in settings:
	settings['max_out_sz'] = 1000L * 1000 * 1000
	+ if 'out_of_order_cache_sz' not in settings:
	+ settings['out_of_order_cache_sz'] = 100 * 1000 * 1000

	settings['max_out_sz'] = long(settings['max_out_sz'])
	settings['split_timestamp'] = int(settings['split_timestamp'])
	settings['file_timestamp'] = int(settings['file_timestamp'])
	settings['netmagic'] = settings['netmagic'].decode('hex')
	+ settings['out_of_order_cache_sz'] = int(settings['out_of_order_cache_sz'])

	if 'output_file' not in settings and 'output' not in settings:
	print("Missing output file / directory")
	sys.exit(1)

	blkindex = get_block_hashes(settings)
	- blkset = mkblockset(blkindex)
	+ blkmap = mkblockmap(blkindex)

	- if not "000000000019d6689c085ae165831e934ff763ae46a2a6c172b3f1b60a8ce26f" in blkset:
	+ if not "000000000019d6689c085ae165831e934ff763ae46a2a6c172b3f1b60a8ce26f" in blkmap:
	print("not found")
	else:
	- copydata(settings, blkindex, blkset)
	+ BlockDataCopier(settings, blkindex, blkmap).run()

File Metadata

Mime Type: text/x-diff
Expires: Sun, Apr 27, 11:19 (1 d, 11 h)
Storage Engine: blob
Storage Format: Raw Data
Storage Handle: 5568570
Default Alt Text: (11 KB)

No OneTemporaryActions

View Options

File Metadata

Event Timeline

No OneTemporary
Actions