Changeset View
Changeset View
Standalone View
Standalone View
contrib/seeds/makeseeds.py
Show All 10 Lines | |||||
import sys | import sys | ||||
from typing import Dict, List, Union | from typing import Dict, List, Union | ||||
import dns.resolver | import dns.resolver | ||||
NSEEDS = 512 | NSEEDS = 512 | ||||
MAX_SEEDS_PER_ASN = { | MAX_SEEDS_PER_ASN = { | ||||
'ipv4': 6, | "ipv4": 6, | ||||
'ipv6': 10, | "ipv6": 10, | ||||
} | } | ||||
MIN_BLOCKS = 760000 | MIN_BLOCKS = 760000 | ||||
PATTERN_IPV4 = re.compile( | PATTERN_IPV4 = re.compile(r"^((\d{1,3})\.(\d{1,3})\.(\d{1,3})\.(\d{1,3})):(\d+)$") | ||||
r"^((\d{1,3})\.(\d{1,3})\.(\d{1,3})\.(\d{1,3})):(\d+)$") | |||||
PATTERN_IPV6 = re.compile(r"^\[([0-9a-z:]+)\]:(\d+)$") | PATTERN_IPV6 = re.compile(r"^\[([0-9a-z:]+)\]:(\d+)$") | ||||
PATTERN_ONION = re.compile( | PATTERN_ONION = re.compile(r"^([abcdefghijklmnopqrstuvwxyz234567]{16}\.onion):(\d+)$") | ||||
r"^([abcdefghijklmnopqrstuvwxyz234567]{16}\.onion):(\d+)$") | |||||
# Used to only select nodes with a user agent string compatible with the | # Used to only select nodes with a user agent string compatible with the | ||||
# eCash network. | # eCash network. | ||||
PATTERN_AGENT = re.compile(r"^(/Bitcoin ABC:0.(26|27).(\d+)\(.+\)/)") | PATTERN_AGENT = re.compile(r"^(/Bitcoin ABC:0.(26|27).(\d+)\(.+\)/)") | ||||
def parseline(line: str) -> Union[dict, None]: | def parseline(line: str) -> Union[dict, None]: | ||||
""" Parses a line from `seeds_main.txt` into a dictionary of details for that line. | """Parses a line from `seeds_main.txt` into a dictionary of details for that line. | ||||
or `None`, if the line could not be parsed. | or `None`, if the line could not be parsed. | ||||
""" | """ | ||||
sline = line.split() | sline = line.split() | ||||
if len(sline) < 11: | if len(sline) < 11: | ||||
# line too short to be valid, skip it. | # line too short to be valid, skip it. | ||||
return None | return None | ||||
# The user agent is at the end of the line. It may contain space, so we | # The user agent is at the end of the line. It may contain space, so we | ||||
# concatenate. | # concatenate. | ||||
for i in range(12, len(sline)): | for i in range(12, len(sline)): | ||||
sline[11] += ' ' + sline[i] | sline[11] += " " + sline[i] | ||||
# Remove leftovers | # Remove leftovers | ||||
del sline[12:] | del sline[12:] | ||||
m = PATTERN_IPV4.match(sline[0]) | m = PATTERN_IPV4.match(sline[0]) | ||||
sortkey = None | sortkey = None | ||||
ip = None | ip = None | ||||
if m is None: | if m is None: | ||||
m = PATTERN_IPV6.match(sline[0]) | m = PATTERN_IPV6.match(sline[0]) | ||||
if m is None: | if m is None: | ||||
m = PATTERN_ONION.match(sline[0]) | m = PATTERN_ONION.match(sline[0]) | ||||
if m is None: | if m is None: | ||||
return None | return None | ||||
else: | else: | ||||
net = 'onion' | net = "onion" | ||||
ipstr = sortkey = m.group(1) | ipstr = sortkey = m.group(1) | ||||
port = int(m.group(2)) | port = int(m.group(2)) | ||||
else: | else: | ||||
net = 'ipv6' | net = "ipv6" | ||||
# Not interested in localhost | # Not interested in localhost | ||||
if m.group(1) in ['::']: | if m.group(1) in ["::"]: | ||||
return None | return None | ||||
ipstr = m.group(1) | ipstr = m.group(1) | ||||
# XXX parse IPv6 into number, could use name_to_ipv6 from | # XXX parse IPv6 into number, could use name_to_ipv6 from | ||||
# generate-seeds | # generate-seeds | ||||
sortkey = ipstr | sortkey = ipstr | ||||
port = int(m.group(2)) | port = int(m.group(2)) | ||||
else: | else: | ||||
# Do IPv4 sanity check | # Do IPv4 sanity check | ||||
ip = 0 | ip = 0 | ||||
for i in range(0, 4): | for i in range(0, 4): | ||||
if int(m.group(i + 2)) < 0 or int(m.group(i + 2)) > 255: | if int(m.group(i + 2)) < 0 or int(m.group(i + 2)) > 255: | ||||
return None | return None | ||||
ip = ip + (int(m.group(i + 2)) << (8 * (3 - i))) | ip = ip + (int(m.group(i + 2)) << (8 * (3 - i))) | ||||
if ip == 0: | if ip == 0: | ||||
return None | return None | ||||
net = 'ipv4' | net = "ipv4" | ||||
sortkey = ip | sortkey = ip | ||||
ipstr = m.group(1) | ipstr = m.group(1) | ||||
port = int(m.group(6)) | port = int(m.group(6)) | ||||
# Skip bad results. | # Skip bad results. | ||||
if sline[1] == 0: | if sline[1] == 0: | ||||
return None | return None | ||||
# Extract uptime %. | # Extract uptime %. | ||||
uptime30 = float(sline[7][:-1]) | uptime30 = float(sline[7][:-1]) | ||||
# Extract Unix timestamp of last success. | # Extract Unix timestamp of last success. | ||||
lastsuccess = int(sline[2]) | lastsuccess = int(sline[2]) | ||||
# Extract protocol version. | # Extract protocol version. | ||||
version = int(sline[10]) | version = int(sline[10]) | ||||
# Extract user agent. | # Extract user agent. | ||||
agent = sline[11][1:-1] | agent = sline[11][1:-1] | ||||
# Extract service flags. | # Extract service flags. | ||||
service = int(sline[9], 16) | service = int(sline[9], 16) | ||||
# Extract blocks. | # Extract blocks. | ||||
blocks = int(sline[8]) | blocks = int(sline[8]) | ||||
# Construct result. | # Construct result. | ||||
return { | return { | ||||
'net': net, | "net": net, | ||||
'ip': ipstr, | "ip": ipstr, | ||||
'port': port, | "port": port, | ||||
'ipnum': ip, | "ipnum": ip, | ||||
'uptime': uptime30, | "uptime": uptime30, | ||||
'lastsuccess': lastsuccess, | "lastsuccess": lastsuccess, | ||||
'version': version, | "version": version, | ||||
'agent': agent, | "agent": agent, | ||||
'service': service, | "service": service, | ||||
'blocks': blocks, | "blocks": blocks, | ||||
'sortkey': sortkey, | "sortkey": sortkey, | ||||
} | } | ||||
def dedup(ips: List[Dict]) -> List[Dict]: | def dedup(ips: List[Dict]) -> List[Dict]: | ||||
""" Remove duplicates from `ips` where multiple ips share address and port. """ | """Remove duplicates from `ips` where multiple ips share address and port.""" | ||||
d = {} | d = {} | ||||
for ip in ips: | for ip in ips: | ||||
d[ip['ip'], ip['port']] = ip | d[ip["ip"], ip["port"]] = ip | ||||
return list(d.values()) | return list(d.values()) | ||||
def filtermultiport(ips: List[Dict]) -> List[Dict]: | def filtermultiport(ips: List[Dict]) -> List[Dict]: | ||||
""" Filter out hosts with more nodes per IP""" | """Filter out hosts with more nodes per IP""" | ||||
hist = collections.defaultdict(list) | hist = collections.defaultdict(list) | ||||
for ip in ips: | for ip in ips: | ||||
hist[ip['sortkey']].append(ip) | hist[ip["sortkey"]].append(ip) | ||||
return [value[0] for (key, value) in list(hist.items()) if len(value) == 1] | return [value[0] for (key, value) in list(hist.items()) if len(value) == 1] | ||||
def lookup_asn(net: str, ip: str) -> Union[int, None]: | def lookup_asn(net: str, ip: str) -> Union[int, None]: | ||||
""" Look up the asn for an `ip` address by querying cymru.com | """Look up the asn for an `ip` address by querying cymru.com | ||||
on network `net` (e.g. ipv4 or ipv6). | on network `net` (e.g. ipv4 or ipv6). | ||||
Returns in integer ASN or None if it could not be found. | Returns in integer ASN or None if it could not be found. | ||||
""" | """ | ||||
try: | try: | ||||
if net == 'ipv4': | if net == "ipv4": | ||||
ipaddr = ip | ipaddr = ip | ||||
prefix = '.origin' | prefix = ".origin" | ||||
else: | else: | ||||
# http://www.team-cymru.com/IP-ASN-mapping.html | # http://www.team-cymru.com/IP-ASN-mapping.html | ||||
# 2001:4860:b002:23::68 | # 2001:4860:b002:23::68 | ||||
res = str() | res = str() | ||||
# pick the first 4 nibbles | # pick the first 4 nibbles | ||||
for nb in ip.split(':')[:4]: | for nb in ip.split(":")[:4]: | ||||
# right padded with '0' | # right padded with '0' | ||||
for c in nb.zfill(4): | for c in nb.zfill(4): | ||||
# 2001 4860 b002 0023 | # 2001 4860 b002 0023 | ||||
res += c + '.' | res += c + "." | ||||
# 2.0.0.1.4.8.6.0.b.0.0.2.0.0.2.3 | # 2.0.0.1.4.8.6.0.b.0.0.2.0.0.2.3 | ||||
ipaddr = res.rstrip('.') | ipaddr = res.rstrip(".") | ||||
prefix = '.origin6' | prefix = ".origin6" | ||||
asn = int([x.to_text() for x in dns.resolver.query('.'.join( | asn = int( | ||||
reversed(ipaddr.split('.'))) + prefix + '.asn.cymru.com', | [ | ||||
'TXT').response.answer][0].split('\"')[1].split(' ')[0]) | x.to_text() | ||||
for x in dns.resolver.query( | |||||
".".join(reversed(ipaddr.split("."))) + prefix + ".asn.cymru.com", | |||||
"TXT", | |||||
).response.answer | |||||
][0] | |||||
.split('"')[1] | |||||
.split(" ")[0] | |||||
) | |||||
return asn | return asn | ||||
except Exception: | except Exception: | ||||
sys.stderr.write('ERR: Could not resolve ASN for "' + ip + '"\n') | sys.stderr.write('ERR: Could not resolve ASN for "' + ip + '"\n') | ||||
return None | return None | ||||
# Based on Greg Maxwell's seed_filter.py | # Based on Greg Maxwell's seed_filter.py | ||||
def filterbyasn(ips: List[Dict], max_per_asn: Dict, | def filterbyasn(ips: List[Dict], max_per_asn: Dict, max_per_net: int) -> List[Dict]: | ||||
max_per_net: int) -> List[Dict]: | |||||
""" Prunes `ips` by | """Prunes `ips` by | ||||
(a) trimming ips to have at most `max_per_net` ips from each net (e.g. ipv4, ipv6); and | (a) trimming ips to have at most `max_per_net` ips from each net (e.g. ipv4, ipv6); and | ||||
(b) trimming ips to have at most `max_per_asn` ips from each asn in each net. | (b) trimming ips to have at most `max_per_asn` ips from each asn in each net. | ||||
""" | """ | ||||
# Sift out ips by type | # Sift out ips by type | ||||
ips_ipv46 = [ip for ip in ips if ip['net'] in ['ipv4', 'ipv6']] | ips_ipv46 = [ip for ip in ips if ip["net"] in ["ipv4", "ipv6"]] | ||||
ips_onion = [ip for ip in ips if ip['net'] == 'onion'] | ips_onion = [ip for ip in ips if ip["net"] == "onion"] | ||||
# Filter IPv46 by ASN, and limit to max_per_net per network | # Filter IPv46 by ASN, and limit to max_per_net per network | ||||
result = [] | result = [] | ||||
net_count: Dict[str, int] = collections.defaultdict(int) | net_count: Dict[str, int] = collections.defaultdict(int) | ||||
asn_count: Dict[int, int] = collections.defaultdict(int) | asn_count: Dict[int, int] = collections.defaultdict(int) | ||||
for i, ip in enumerate(ips_ipv46): | for i, ip in enumerate(ips_ipv46): | ||||
if i % 10 == 0: | if i % 10 == 0: | ||||
# give progress update | # give progress update | ||||
print( | print( | ||||
f"{i:6d}/{len(ips_ipv46)} [{100*i/len(ips_ipv46):04.1f}%]\r", | f"{i:6d}/{len(ips_ipv46)} [{100*i/len(ips_ipv46):04.1f}%]\r", | ||||
file=sys.stderr, | file=sys.stderr, | ||||
end='', | end="", | ||||
flush=True) | flush=True, | ||||
) | |||||
if net_count[ip['net']] == max_per_net: | if net_count[ip["net"]] == max_per_net: | ||||
# do not add this ip as we already too many | # do not add this ip as we already too many | ||||
# ips from this network | # ips from this network | ||||
continue | continue | ||||
asn = lookup_asn(ip['net'], ip['ip']) | asn = lookup_asn(ip["net"], ip["ip"]) | ||||
if asn is None or asn_count[asn] == max_per_asn[ip['net']]: | if asn is None or asn_count[asn] == max_per_asn[ip["net"]]: | ||||
# do not add this ip as we already have too many | # do not add this ip as we already have too many | ||||
# ips from this ASN on this network | # ips from this ASN on this network | ||||
continue | continue | ||||
asn_count[asn] += 1 | asn_count[asn] += 1 | ||||
net_count[ip['net']] += 1 | net_count[ip["net"]] += 1 | ||||
result.append(ip) | result.append(ip) | ||||
# Add back Onions (up to max_per_net) | # Add back Onions (up to max_per_net) | ||||
result.extend(ips_onion[0:max_per_net]) | result.extend(ips_onion[0:max_per_net]) | ||||
return result | return result | ||||
def ip_stats(ips: List[Dict]) -> str: | def ip_stats(ips: List[Dict]) -> str: | ||||
""" Format and return pretty string from `ips`. """ | """Format and return pretty string from `ips`.""" | ||||
hist: Dict[str, int] = collections.defaultdict(int) | hist: Dict[str, int] = collections.defaultdict(int) | ||||
for ip in ips: | for ip in ips: | ||||
if ip is not None: | if ip is not None: | ||||
hist[ip['net']] += 1 | hist[ip["net"]] += 1 | ||||
return f"{hist['ipv4']:6d} {hist['ipv6']:6d} {hist['onion']:6d}" | return f"{hist['ipv4']:6d} {hist['ipv6']:6d} {hist['onion']:6d}" | ||||
def main(): | def main(): | ||||
lines = sys.stdin.readlines() | lines = sys.stdin.readlines() | ||||
ips = [parseline(line) for line in lines] | ips = [parseline(line) for line in lines] | ||||
print( | print( | ||||
'\x1b[7m IPv4 IPv6 Onion Pass \x1b[0m', | ( | ||||
file=sys.stderr) | "\x1b[7m IPv4 IPv6 Onion Pass " | ||||
print(f'{ip_stats(ips):s} Initial', file=sys.stderr) | " \x1b[0m" | ||||
), | |||||
file=sys.stderr, | |||||
) | |||||
print(f"{ip_stats(ips):s} Initial", file=sys.stderr) | |||||
# Skip entries with invalid address. | # Skip entries with invalid address. | ||||
ips = [ip for ip in ips if ip is not None] | ips = [ip for ip in ips if ip is not None] | ||||
print( | print(f"{ip_stats(ips):s} Skip entries with invalid address", file=sys.stderr) | ||||
f'{ip_stats(ips):s} Skip entries with invalid address', | |||||
file=sys.stderr) | |||||
# Skip duplicates (in case multiple seeds files were concatenated) | # Skip duplicates (in case multiple seeds files were concatenated) | ||||
ips = dedup(ips) | ips = dedup(ips) | ||||
print(f'{ip_stats(ips):s} After removing duplicates', file=sys.stderr) | print(f"{ip_stats(ips):s} After removing duplicates", file=sys.stderr) | ||||
# Enforce minimal number of blocks. | # Enforce minimal number of blocks. | ||||
ips = [ip for ip in ips if ip['blocks'] >= MIN_BLOCKS] | ips = [ip for ip in ips if ip["blocks"] >= MIN_BLOCKS] | ||||
print( | print(f"{ip_stats(ips):s} Enforce minimal number of blocks", file=sys.stderr) | ||||
f'{ip_stats(ips):s} Enforce minimal number of blocks', | |||||
file=sys.stderr) | |||||
# Require service bit 1. | # Require service bit 1. | ||||
ips = [ip for ip in ips if (ip['service'] & 1) == 1] | ips = [ip for ip in ips if (ip["service"] & 1) == 1] | ||||
print(f'{ip_stats(ips):s} Require service bit 1', file=sys.stderr) | print(f"{ip_stats(ips):s} Require service bit 1", file=sys.stderr) | ||||
# Require at least 50% 30-day uptime for clearnet, 10% for onion. | # Require at least 50% 30-day uptime for clearnet, 10% for onion. | ||||
req_uptime = { | req_uptime = { | ||||
'ipv4': 50, | "ipv4": 50, | ||||
'ipv6': 50, | "ipv6": 50, | ||||
'onion': 10, | "onion": 10, | ||||
} | } | ||||
ips = [ip for ip in ips if ip['uptime'] > req_uptime[ip['net']]] | ips = [ip for ip in ips if ip["uptime"] > req_uptime[ip["net"]]] | ||||
print(f'{ip_stats(ips):s} Require minimum uptime', file=sys.stderr) | print(f"{ip_stats(ips):s} Require minimum uptime", file=sys.stderr) | ||||
# Require a known and recent user agent. | # Require a known and recent user agent. | ||||
ips = [ip for ip in ips if PATTERN_AGENT.match(ip['agent'])] | ips = [ip for ip in ips if PATTERN_AGENT.match(ip["agent"])] | ||||
print( | print(f"{ip_stats(ips):s} Require a known and recent user agent", file=sys.stderr) | ||||
f'{ip_stats(ips):s} Require a known and recent user agent', | |||||
file=sys.stderr) | |||||
# Sort by availability (and use last success as tie breaker) | # Sort by availability (and use last success as tie breaker) | ||||
ips.sort(key=lambda x: | ips.sort(key=lambda x: (x["uptime"], x["lastsuccess"], x["ip"]), reverse=True) | ||||
(x['uptime'], x['lastsuccess'], x['ip']), reverse=True) | |||||
# Filter out hosts with multiple bitcoin ports, these are likely abusive | # Filter out hosts with multiple bitcoin ports, these are likely abusive | ||||
ips = filtermultiport(ips) | ips = filtermultiport(ips) | ||||
print( | print( | ||||
f'{ip_stats(ips):s} Filter out hosts with multiple bitcoin ports', | f"{ip_stats(ips):s} Filter out hosts with multiple bitcoin ports", | ||||
file=sys.stderr) | file=sys.stderr, | ||||
) | |||||
# Look up ASNs and limit results, both per ASN and globally. | # Look up ASNs and limit results, both per ASN and globally. | ||||
ips = filterbyasn(ips, MAX_SEEDS_PER_ASN, NSEEDS) | ips = filterbyasn(ips, MAX_SEEDS_PER_ASN, NSEEDS) | ||||
print( | print( | ||||
f'{ip_stats(ips):s} Look up ASNs and limit results per ASN and per net', | f"{ip_stats(ips):s} Look up ASNs and limit results per ASN and per net", | ||||
file=sys.stderr) | file=sys.stderr, | ||||
) | |||||
# Sort the results by IP address (for deterministic output). | # Sort the results by IP address (for deterministic output). | ||||
ips.sort(key=lambda x: (x['net'], x['sortkey'])) | ips.sort(key=lambda x: (x["net"], x["sortkey"])) | ||||
for ip in ips: | for ip in ips: | ||||
if ip['net'] == 'ipv6': | if ip["net"] == "ipv6": | ||||
print(f"[{ip['ip']}]:{ip['port']}") | print(f"[{ip['ip']}]:{ip['port']}") | ||||
else: | else: | ||||
print(f"{ip['ip']}:{ip['port']}") | print(f"{ip['ip']}:{ip['port']}") | ||||
if __name__ == '__main__': | if __name__ == "__main__": | ||||
main() | main() |