diff --git a/contrib/devtools/copyright_header.py b/contrib/devtools/copyright_header.py index 77d555428..d5606d14f 100755 --- a/contrib/devtools/copyright_header.py +++ b/contrib/devtools/copyright_header.py @@ -1,664 +1,666 @@ #!/usr/bin/env python3 # Copyright (c) 2016 The Bitcoin Core developers # Copyright (c) 2017 The Bitcoin developers # Distributed under the MIT software license, see the accompanying # file COPYING or http://www.opensource.org/licenses/mit-license.php. import re import fnmatch import sys import subprocess import datetime import os ################################################################################ # file filtering ################################################################################ EXCLUDE = [ # libsecp256k1: 'src/secp256k1/include/secp256k1.h', 'src/secp256k1/include/secp256k1_ecdh.h', 'src/secp256k1/include/secp256k1_recovery.h', 'src/secp256k1/include/secp256k1_schnorr.h', 'src/secp256k1/src/java/org_bitcoin_NativeSecp256k1.c', 'src/secp256k1/src/java/org_bitcoin_NativeSecp256k1.h', 'src/secp256k1/src/java/org_bitcoin_Secp256k1Context.c', 'src/secp256k1/src/java/org_bitcoin_Secp256k1Context.h', # auto generated: 'src/univalue/lib/univalue_escapes.h', 'src/qt/bitcoinstrings.cpp', 'src/chainparamsseeds.h', # other external copyrights: 'src/tinyformat.h', 'src/leveldb/util/env_win.cc', 'src/crypto/ctaes/bench.c', 'test/functional/test_framework/bignum.py', # python init: '*__init__.py', ] EXCLUDE_COMPILED = re.compile( '|'.join([fnmatch.translate(m) for m in EXCLUDE])) INCLUDE = ['*.h', '*.cpp', '*.cc', '*.c', '*.py'] INCLUDE_COMPILED = re.compile( '|'.join([fnmatch.translate(m) for m in INCLUDE])) def applies_to_file(filename): return ((EXCLUDE_COMPILED.match(filename) is None) and (INCLUDE_COMPILED.match(filename) is not None)) ################################################################################ # obtain list of files in repo according to INCLUDE and EXCLUDE ################################################################################ GIT_LS_CMD = 'git ls-files' def call_git_ls(): out = subprocess.check_output(GIT_LS_CMD.split(' ')) return [f for f in out.decode("utf-8").split('\n') if f != ''] def get_filenames_to_examine(): filenames = call_git_ls() return sorted([filename for filename in filenames if applies_to_file(filename)]) ################################################################################ # define and compile regexes for the patterns we are looking for ################################################################################ COPYRIGHT_WITH_C = 'Copyright \(c\)' COPYRIGHT_WITHOUT_C = 'Copyright' -ANY_COPYRIGHT_STYLE = '(%s|%s)' % (COPYRIGHT_WITH_C, COPYRIGHT_WITHOUT_C) +ANY_COPYRIGHT_STYLE = '({}|{})'.format(COPYRIGHT_WITH_C, COPYRIGHT_WITHOUT_C) YEAR = "20[0-9][0-9]" -YEAR_RANGE = '(%s)(-%s)?' % (YEAR, YEAR) -YEAR_LIST = '(%s)(, %s)+' % (YEAR, YEAR) -ANY_YEAR_STYLE = '(%s|%s)' % (YEAR_RANGE, YEAR_LIST) -ANY_COPYRIGHT_STYLE_OR_YEAR_STYLE = ("%s %s" % (ANY_COPYRIGHT_STYLE, - ANY_YEAR_STYLE)) +YEAR_RANGE = '({})(-{})?'.format(YEAR, YEAR) +YEAR_LIST = '({})(, {})+'.format(YEAR, YEAR) +ANY_YEAR_STYLE = '({}|{})'.format(YEAR_RANGE, YEAR_LIST) +ANY_COPYRIGHT_STYLE_OR_YEAR_STYLE = ( + "{} {}".format(ANY_COPYRIGHT_STYLE, ANY_YEAR_STYLE)) ANY_COPYRIGHT_COMPILED = re.compile(ANY_COPYRIGHT_STYLE_OR_YEAR_STYLE) def compile_copyright_regex(copyright_style, year_style, name): - return re.compile('%s %s %s' % (copyright_style, year_style, name)) + return re.compile('{} {} {}'.format(copyright_style, year_style, name)) EXPECTED_HOLDER_NAMES = [ "Satoshi Nakamoto\n", "The Bitcoin Core developers\n", "The Bitcoin Core developers \n", "Bitcoin Core Developers\n", "the Bitcoin Core developers\n", "The Bitcoin developers\n", "The LevelDB Authors\. All rights reserved\.\n", "BitPay Inc\.\n", "BitPay, Inc\.\n", "University of Illinois at Urbana-Champaign\.\n", "MarcoFalke\n", "Pieter Wuille\n", "Pieter Wuille +\*\n", "Pieter Wuille, Gregory Maxwell +\*\n", "Pieter Wuille, Andrew Poelstra +\*\n", "Andrew Poelstra +\*\n", "Wladimir J. van der Laan\n", "Jeff Garzik\n", "Diederik Huys, Pieter Wuille +\*\n", "Thomas Daede, Cory Fields +\*\n", "Jan-Klaas Kollhof\n", "Sam Rushing\n", "ArtForz -- public domain half-a-node\n", "Amaury SÉCHET\n", "Jeremy Rubin\n", ] DOMINANT_STYLE_COMPILED = {} YEAR_LIST_STYLE_COMPILED = {} WITHOUT_C_STYLE_COMPILED = {} for holder_name in EXPECTED_HOLDER_NAMES: DOMINANT_STYLE_COMPILED[holder_name] = ( compile_copyright_regex(COPYRIGHT_WITH_C, YEAR_RANGE, holder_name)) YEAR_LIST_STYLE_COMPILED[holder_name] = ( compile_copyright_regex(COPYRIGHT_WITH_C, YEAR_LIST, holder_name)) WITHOUT_C_STYLE_COMPILED[holder_name] = ( compile_copyright_regex(COPYRIGHT_WITHOUT_C, ANY_YEAR_STYLE, holder_name)) ################################################################################ # search file contents for copyright message of particular category ################################################################################ def get_count_of_copyrights_of_any_style_any_holder(contents): return len(ANY_COPYRIGHT_COMPILED.findall(contents)) def file_has_dominant_style_copyright_for_holder(contents, holder_name): match = DOMINANT_STYLE_COMPILED[holder_name].search(contents) return match is not None def file_has_year_list_style_copyright_for_holder(contents, holder_name): match = YEAR_LIST_STYLE_COMPILED[holder_name].search(contents) return match is not None def file_has_without_c_style_copyright_for_holder(contents, holder_name): match = WITHOUT_C_STYLE_COMPILED[holder_name].search(contents) return match is not None ################################################################################ # get file info ################################################################################ def read_file(filename): return open(os.path.abspath(filename), 'r').read() def gather_file_info(filename): info = {} info['filename'] = filename c = read_file(filename) info['contents'] = c info['all_copyrights'] = get_count_of_copyrights_of_any_style_any_holder(c) info['classified_copyrights'] = 0 info['dominant_style'] = {} info['year_list_style'] = {} info['without_c_style'] = {} for holder_name in EXPECTED_HOLDER_NAMES: has_dominant_style = ( file_has_dominant_style_copyright_for_holder(c, holder_name)) has_year_list_style = ( file_has_year_list_style_copyright_for_holder(c, holder_name)) has_without_c_style = ( file_has_without_c_style_copyright_for_holder(c, holder_name)) info['dominant_style'][holder_name] = has_dominant_style info['year_list_style'][holder_name] = has_year_list_style info['without_c_style'][holder_name] = has_without_c_style if has_dominant_style or has_year_list_style or has_without_c_style: info['classified_copyrights'] = info['classified_copyrights'] + 1 return info ################################################################################ # report execution ################################################################################ SEPARATOR = '-'.join(['' for _ in range(80)]) def print_filenames(filenames, verbose): if not verbose: return for filename in filenames: - print("\t%s" % filename) + print("\t{}".format(filename)) def print_report(file_infos, verbose): print(SEPARATOR) examined = [i['filename'] for i in file_infos] - print("%d files examined according to INCLUDE and EXCLUDE fnmatch rules" % - len(examined)) + print("{} files examined according to INCLUDE and EXCLUDE fnmatch rules".format( + len(examined))) print_filenames(examined, verbose) print(SEPARATOR) print('') zero_copyrights = [i['filename'] for i in file_infos if i['all_copyrights'] == 0] - print("%4d with zero copyrights" % len(zero_copyrights)) + print("{:4d} with zero copyrights".format(len(zero_copyrights))) print_filenames(zero_copyrights, verbose) one_copyright = [i['filename'] for i in file_infos if i['all_copyrights'] == 1] - print("%4d with one copyright" % len(one_copyright)) + print("{:4d} with one copyright".format(len(one_copyright))) print_filenames(one_copyright, verbose) two_copyrights = [i['filename'] for i in file_infos if i['all_copyrights'] == 2] - print("%4d with two copyrights" % len(two_copyrights)) + print("{:4d} with two copyrights".format(len(two_copyrights))) print_filenames(two_copyrights, verbose) three_copyrights = [i['filename'] for i in file_infos if i['all_copyrights'] == 3] - print("%4d with three copyrights" % len(three_copyrights)) + print("{:4d} with three copyrights".format(len(three_copyrights))) print_filenames(three_copyrights, verbose) four_or_more_copyrights = [i['filename'] for i in file_infos if i['all_copyrights'] >= 4] - print("%4d with four or more copyrights" % len(four_or_more_copyrights)) + print("{:4d} with four or more copyrights".format( + len(four_or_more_copyrights))) print_filenames(four_or_more_copyrights, verbose) print('') print(SEPARATOR) print('Copyrights with dominant style:\ne.g. "Copyright (c)" and ' '"" or "-":\n') for holder_name in EXPECTED_HOLDER_NAMES: dominant_style = [i['filename'] for i in file_infos if i['dominant_style'][holder_name]] if len(dominant_style) > 0: - print("%4d with '%s'" % (len(dominant_style), - holder_name.replace('\n', '\\n'))) + print("{:4d} with '{}'".format( + len(dominant_style), holder_name.replace('\n', '\\n'))) print_filenames(dominant_style, verbose) print('') print(SEPARATOR) print('Copyrights with year list style:\ne.g. "Copyright (c)" and ' '", , ...":\n') for holder_name in EXPECTED_HOLDER_NAMES: year_list_style = [i['filename'] for i in file_infos if i['year_list_style'][holder_name]] if len(year_list_style) > 0: - print("%4d with '%s'" % (len(year_list_style), - holder_name.replace('\n', '\\n'))) + print("{:4d} with '{}'".format( + len(year_list_style), holder_name.replace('\n', '\\n'))) print_filenames(year_list_style, verbose) print('') print(SEPARATOR) print('Copyrights with no "(c)" style:\ne.g. "Copyright" and "" or ' '"-":\n') for holder_name in EXPECTED_HOLDER_NAMES: without_c_style = [i['filename'] for i in file_infos if i['without_c_style'][holder_name]] if len(without_c_style) > 0: - print("%4d with '%s'" % (len(without_c_style), - holder_name.replace('\n', '\\n'))) + print("{:4d} with '{}'".format( + len(without_c_style), holder_name.replace('\n', '\\n'))) print_filenames(without_c_style, verbose) print('') print(SEPARATOR) unclassified_copyrights = [i['filename'] for i in file_infos if i['classified_copyrights'] < i['all_copyrights']] - print("%d with unexpected copyright holder names" % - len(unclassified_copyrights)) + print("{} with unexpected copyright holder names".format( + len(unclassified_copyrights))) print_filenames(unclassified_copyrights, verbose) print(SEPARATOR) def exec_report(base_directory, verbose): original_cwd = os.getcwd() os.chdir(base_directory) filenames = get_filenames_to_examine() file_infos = [gather_file_info(f) for f in filenames] print_report(file_infos, verbose) os.chdir(original_cwd) ################################################################################ # report cmd ################################################################################ REPORT_USAGE = """ Produces a report of all copyright header notices found inside the source files of a repository. Usage: $ ./copyright_header.py report [verbose] Arguments: - The base directory of a bitcoin source code repository. [verbose] - Includes a list of every file of each subcategory in the report. """ def report_cmd(argv): if len(argv) == 2: sys.exit(REPORT_USAGE) base_directory = argv[2] if not os.path.exists(base_directory): - sys.exit("*** bad : %s" % base_directory) + sys.exit("*** bad : {}".format(base_directory)) if len(argv) == 3: verbose = False elif argv[3] == 'verbose': verbose = True else: - sys.exit("*** unknown argument: %s" % argv[2]) + sys.exit("*** unknown argument: {}".format(argv[2])) exec_report(base_directory, verbose) ################################################################################ # query git for year of last change ################################################################################ -GIT_LOG_CMD = "git log --pretty=format:%%ai %s" +GIT_LOG_CMD = "git log --pretty=format:%ai {}" def call_git_log(filename): - out = subprocess.check_output((GIT_LOG_CMD % filename).split(' ')) + out = subprocess.check_output((GIT_LOG_CMD.format(filename)).split(' ')) return out.decode("utf-8").split('\n') def get_git_change_years(filename): git_log_lines = call_git_log(filename) if len(git_log_lines) == 0: return [datetime.date.today().year] # timestamp is in ISO 8601 format. e.g. "2016-09-05 14:25:32 -0600" return [line.split(' ')[0].split('-')[0] for line in git_log_lines] def get_most_recent_git_change_year(filename): return max(get_git_change_years(filename)) ################################################################################ # read and write to file ################################################################################ def read_file_lines(filename): f = open(os.path.abspath(filename), 'r') file_lines = f.readlines() f.close() return file_lines def write_file_lines(filename, file_lines): f = open(os.path.abspath(filename), 'w') f.write(''.join(file_lines)) f.close() ################################################################################ # update header years execution ################################################################################ COPYRIGHT = 'Copyright \(c\)' YEAR = "20[0-9][0-9]" -YEAR_RANGE = '(%s)(-%s)?' % (YEAR, YEAR) +YEAR_RANGE = '({})(-{})?'.format(YEAR, YEAR) HOLDER = 'The Bitcoin developers' UPDATEABLE_LINE_COMPILED = re.compile( ' '.join([COPYRIGHT, YEAR_RANGE, HOLDER])) def get_updatable_copyright_line(file_lines): index = 0 for line in file_lines: if UPDATEABLE_LINE_COMPILED.search(line) is not None: return index, line index = index + 1 return None, None def parse_year_range(year_range): year_split = year_range.split('-') start_year = year_split[0] if len(year_split) == 1: return start_year, start_year return start_year, year_split[1] def year_range_to_str(start_year, end_year): if start_year == end_year: return start_year - return "%s-%s" % (start_year, end_year) + return "{}-{}".format(start_year, end_year) def create_updated_copyright_line(line, last_git_change_year): copyright_splitter = 'Copyright (c) ' copyright_split = line.split(copyright_splitter) # Preserve characters on line that are ahead of the start of the copyright # notice - they are part of the comment block and vary from file-to-file. before_copyright = copyright_split[0] after_copyright = copyright_split[1] space_split = after_copyright.split(' ') year_range = space_split[0] start_year, end_year = parse_year_range(year_range) if end_year == last_git_change_year: return line return (before_copyright + copyright_splitter + year_range_to_str(start_year, last_git_change_year) + ' ' + ' '.join(space_split[1:])) def update_updatable_copyright(filename): file_lines = read_file_lines(filename) index, line = get_updatable_copyright_line(file_lines) if not line: print_file_action_message(filename, "No updatable copyright.") return last_git_change_year = get_most_recent_git_change_year(filename) new_line = create_updated_copyright_line(line, last_git_change_year) if line == new_line: print_file_action_message(filename, "Copyright up-to-date.") return file_lines[index] = new_line write_file_lines(filename, file_lines) print_file_action_message(filename, - "Copyright updated! -> %s" % last_git_change_year) + "Copyright updated! -> {}".format( + last_git_change_year)) def exec_update_header_year(base_directory): original_cwd = os.getcwd() os.chdir(base_directory) for filename in get_filenames_to_examine(): update_updatable_copyright(filename) os.chdir(original_cwd) ################################################################################ # update cmd ################################################################################ UPDATE_USAGE = """ Updates all the copyright headers of "The Bitcoin developers" which were changed in a year more recent than is listed. For example: // Copyright (c) - The Bitcoin developers will be updated to: // Copyright (c) - The Bitcoin developers where is obtained from the 'git log' history. This subcommand also handles copyright headers that have only a single year. In those cases: // Copyright (c) The Bitcoin developers will be updated to: // Copyright (c) - The Bitcoin developers where the update is appropriate. Usage: $ ./copyright_header.py update Arguments: - The base directory of a bitcoin source code repository. """ def print_file_action_message(filename, action): - print("%-52s %s" % (filename, action)) + print("{:52s} {}".format(filename, action)) def update_cmd(argv): if len(argv) != 3: sys.exit(UPDATE_USAGE) base_directory = argv[2] if not os.path.exists(base_directory): - sys.exit("*** bad base_directory: %s" % base_directory) + sys.exit("*** bad base_directory: {}".format(base_directory)) exec_update_header_year(base_directory) ################################################################################ # inserted copyright header format ################################################################################ def get_header_lines(header, start_year, end_year): lines = header.split('\n')[1:-1] - lines[0] = lines[0] % year_range_to_str(start_year, end_year) + lines[0] = lines[0].format(year_range_to_str(start_year, end_year)) return [line + '\n' for line in lines] CPP_HEADER = ''' -// Copyright (c) %s The Bitcoin developers +// Copyright (c) {} The Bitcoin developers // Distributed under the MIT software license, see the accompanying // file COPYING or http://www.opensource.org/licenses/mit-license.php. ''' def get_cpp_header_lines_to_insert(start_year, end_year): return reversed(get_header_lines(CPP_HEADER, start_year, end_year)) PYTHON_HEADER = ''' -# Copyright (c) %s The Bitcoin developers +# Copyright (c) {} The Bitcoin developers # Distributed under the MIT software license, see the accompanying # file COPYING or http://www.opensource.org/licenses/mit-license.php. ''' def get_python_header_lines_to_insert(start_year, end_year): return reversed(get_header_lines(PYTHON_HEADER, start_year, end_year)) ################################################################################ # query git for year of last change ################################################################################ def get_git_change_year_range(filename): years = get_git_change_years(filename) return min(years), max(years) ################################################################################ # check for existing core copyright ################################################################################ def file_already_has_bitcoin_copyright(file_lines): index, _ = get_updatable_copyright_line(file_lines) return index != None ################################################################################ # insert header execution ################################################################################ def file_has_hashbang(file_lines): if len(file_lines) < 1: return False if len(file_lines[0]) <= 2: return False return file_lines[0][:2] == '#!' def insert_python_header(filename, file_lines, start_year, end_year): if file_has_hashbang(file_lines): insert_idx = 1 else: insert_idx = 0 header_lines = get_python_header_lines_to_insert(start_year, end_year) for line in header_lines: file_lines.insert(insert_idx, line) write_file_lines(filename, file_lines) def insert_cpp_header(filename, file_lines, start_year, end_year): header_lines = get_cpp_header_lines_to_insert(start_year, end_year) for line in header_lines: file_lines.insert(0, line) write_file_lines(filename, file_lines) def exec_insert_header(filename, style): file_lines = read_file_lines(filename) if file_already_has_bitcoin_copyright(file_lines): - sys.exit('*** %s already has a copyright by The Bitcoin developers' - % (filename)) + sys.exit('*** {} already has a copyright by The Bitcoin developers'.format( + filename)) start_year, end_year = get_git_change_year_range(filename) if style == 'python': insert_python_header(filename, file_lines, start_year, end_year) else: insert_cpp_header(filename, file_lines, start_year, end_year) ################################################################################ # insert cmd ################################################################################ INSERT_USAGE = """ Inserts a copyright header for "The Bitcoin developers" at the top of the file in either Python or C++ style as determined by the file extension. If the file is a Python file and it has a '#!' starting the first line, the header is inserted in the line below it. The copyright dates will be set to be: "-" where is according to the 'git log' history. If is equal to , the date will be set to be: "" If the file already has a copyright for "The Bitcoin developers", the script will exit. Usage: $ ./copyright_header.py insert Arguments: - A source file in the bitcoin repository. """ def insert_cmd(argv): if len(argv) != 3: sys.exit(INSERT_USAGE) filename = argv[2] if not os.path.isfile(filename): - sys.exit("*** bad filename: %s" % filename) + sys.exit("*** bad filename: {}".format(filename)) _, extension = os.path.splitext(filename) if extension not in ['.h', '.cpp', '.cc', '.c', '.py']: - sys.exit("*** cannot insert for file extension %s" % extension) + sys.exit("*** cannot insert for file extension {}".format(extension)) if extension == '.py': style = 'python' else: style = 'cpp' exec_insert_header(filename, style) ################################################################################ # UI ################################################################################ USAGE = """ copyright_header.py - utilities for managing copyright headers of 'The Bitcoin developers' in repository source files. Usage: $ ./copyright_header Subcommands: report update insert To see subcommand usage, run them without arguments. """ SUBCOMMANDS = ['report', 'update', 'insert'] if __name__ == "__main__": if len(sys.argv) == 1: sys.exit(USAGE) subcommand = sys.argv[1] if subcommand not in SUBCOMMANDS: sys.exit(USAGE) if subcommand == 'report': report_cmd(sys.argv) elif subcommand == 'update': update_cmd(sys.argv) elif subcommand == 'insert': insert_cmd(sys.argv) diff --git a/contrib/devtools/security-check.py b/contrib/devtools/security-check.py index fd53f4aaf..b25deb45e 100755 --- a/contrib/devtools/security-check.py +++ b/contrib/devtools/security-check.py @@ -1,235 +1,235 @@ #!/usr/bin/env python # Copyright (c) 2015-2016 The Bitcoin Core developers # Distributed under the MIT software license, see the accompanying # file COPYING or http://www.opensource.org/licenses/mit-license.php. ''' Perform basic ELF security checks on a series of executables. Exit status will be 0 if successful, and the program will be silent. Otherwise the exit status will be 1 and it will log which executables failed which checks. Needs `readelf` (for ELF) and `objdump` (for PE). ''' from __future__ import division, print_function, unicode_literals import subprocess import sys import os READELF_CMD = os.getenv('READELF', '/usr/bin/readelf') OBJDUMP_CMD = os.getenv('OBJDUMP', '/usr/bin/objdump') # checks which are non-fatal for now but only generate a warning NONFATAL = {'HIGH_ENTROPY_VA'} def check_ELF_PIE(executable): ''' Check for position independent executable (PIE), allowing for address space randomization. ''' p = subprocess.Popen([READELF_CMD, '-h', '-W', executable], stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE, universal_newlines=True) (stdout, stderr) = p.communicate() if p.returncode: raise IOError('Error opening file') ok = False for line in stdout.splitlines(): line = line.split() if len(line) >= 2 and line[0] == 'Type:' and line[1] == 'DYN': ok = True return ok def get_ELF_program_headers(executable): '''Return type and flags for ELF program headers''' p = subprocess.Popen([READELF_CMD, '-l', '-W', executable], stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE, universal_newlines=True) (stdout, stderr) = p.communicate() if p.returncode: raise IOError('Error opening file') in_headers = False count = 0 headers = [] for line in stdout.splitlines(): if line.startswith('Program Headers:'): in_headers = True if line == '': in_headers = False if in_headers: if count == 1: # header line ofs_typ = line.find('Type') ofs_offset = line.find('Offset') ofs_flags = line.find('Flg') ofs_align = line.find('Align') if ofs_typ == -1 or ofs_offset == -1 or ofs_flags == -1 or ofs_align == -1: raise ValueError('Cannot parse elfread -lW output') elif count > 1: typ = line[ofs_typ:ofs_offset].rstrip() flags = line[ofs_flags:ofs_align].rstrip() headers.append((typ, flags)) count += 1 return headers def check_ELF_NX(executable): ''' Check that no sections are writable and executable (including the stack) ''' have_wx = False have_gnu_stack = False for (typ, flags) in get_ELF_program_headers(executable): if typ == 'GNU_STACK': have_gnu_stack = True if 'W' in flags and 'E' in flags: # section is both writable and executable have_wx = True return have_gnu_stack and not have_wx def check_ELF_RELRO(executable): ''' Check for read-only relocations. GNU_RELRO program header must exist Dynamic section must have BIND_NOW flag ''' have_gnu_relro = False for (typ, flags) in get_ELF_program_headers(executable): # Note: not checking flags == 'R': here as linkers set the permission differently # This does not affect security: the permission flags of the GNU_RELRO program header are ignored, the PT_LOAD header determines the effective permissions. # However, the dynamic linker need to write to this area so these are RW. # Glibc itself takes care of mprotecting this area R after relocations are finished. # See also http://permalink.gmane.org/gmane.comp.gnu.binutils/71347 if typ == 'GNU_RELRO': have_gnu_relro = True have_bindnow = False p = subprocess.Popen([READELF_CMD, '-d', '-W', executable], stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE, universal_newlines=True) (stdout, stderr) = p.communicate() if p.returncode: raise IOError('Error opening file') for line in stdout.splitlines(): tokens = line.split() if len(tokens) > 1 and tokens[1] == '(BIND_NOW)' or (len(tokens) > 2 and tokens[1] == '(FLAGS)' and 'BIND_NOW' in tokens[2]): have_bindnow = True return have_gnu_relro and have_bindnow def check_ELF_Canary(executable): ''' Check for use of stack canary ''' p = subprocess.Popen([READELF_CMD, '--dyn-syms', '-W', executable], stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE, universal_newlines=True) (stdout, stderr) = p.communicate() if p.returncode: raise IOError('Error opening file') ok = False for line in stdout.splitlines(): if '__stack_chk_fail' in line: ok = True return ok def get_PE_dll_characteristics(executable): ''' Get PE DllCharacteristics bits. Returns a tuple (arch,bits) where arch is 'i386:x86-64' or 'i386' and bits is the DllCharacteristics value. ''' p = subprocess.Popen([OBJDUMP_CMD, '-x', executable], stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE, universal_newlines=True) (stdout, stderr) = p.communicate() if p.returncode: raise IOError('Error opening file') arch = '' bits = 0 for line in stdout.splitlines(): tokens = line.split() if len(tokens) >= 2 and tokens[0] == 'architecture:': arch = tokens[1].rstrip(',') if len(tokens) >= 2 and tokens[0] == 'DllCharacteristics': bits = int(tokens[1], 16) return (arch, bits) IMAGE_DLL_CHARACTERISTICS_HIGH_ENTROPY_VA = 0x0020 IMAGE_DLL_CHARACTERISTICS_DYNAMIC_BASE = 0x0040 IMAGE_DLL_CHARACTERISTICS_NX_COMPAT = 0x0100 def check_PE_DYNAMIC_BASE(executable): '''PIE: DllCharacteristics bit 0x40 signifies dynamicbase (ASLR)''' (arch, bits) = get_PE_dll_characteristics(executable) reqbits = IMAGE_DLL_CHARACTERISTICS_DYNAMIC_BASE return (bits & reqbits) == reqbits # On 64 bit, must support high-entropy 64-bit address space layout randomization in addition to DYNAMIC_BASE # to have secure ASLR. def check_PE_HIGH_ENTROPY_VA(executable): '''PIE: DllCharacteristics bit 0x20 signifies high-entropy ASLR''' (arch, bits) = get_PE_dll_characteristics(executable) if arch == 'i386:x86-64': reqbits = IMAGE_DLL_CHARACTERISTICS_HIGH_ENTROPY_VA else: # Unnecessary on 32-bit assert(arch == 'i386') reqbits = 0 return (bits & reqbits) == reqbits def check_PE_NX(executable): '''NX: DllCharacteristics bit 0x100 signifies nxcompat (DEP)''' (arch, bits) = get_PE_dll_characteristics(executable) return (bits & IMAGE_DLL_CHARACTERISTICS_NX_COMPAT) == IMAGE_DLL_CHARACTERISTICS_NX_COMPAT CHECKS = { 'ELF': [ ('PIE', check_ELF_PIE), ('NX', check_ELF_NX), ('RELRO', check_ELF_RELRO), ('Canary', check_ELF_Canary) ], 'PE': [ ('DYNAMIC_BASE', check_PE_DYNAMIC_BASE), ('HIGH_ENTROPY_VA', check_PE_HIGH_ENTROPY_VA), ('NX', check_PE_NX) ] } def identify_executable(executable): with open(filename, 'rb') as f: magic = f.read(4) if magic.startswith(b'MZ'): return 'PE' elif magic.startswith(b'\x7fELF'): return 'ELF' return None if __name__ == '__main__': retval = 0 for filename in sys.argv[1:]: try: etype = identify_executable(filename) if etype is None: - print('%s: unknown format' % filename) + print('{}: unknown format'.format(filename)) retval = 1 continue failed = [] warning = [] for (name, func) in CHECKS[etype]: if not func(filename): if name in NONFATAL: warning.append(name) else: failed.append(name) if failed: - print('%s: failed %s' % (filename, ' '.join(failed))) + print('{}: failed {}'.format(filename, ' '.join(failed))) retval = 1 if warning: - print('%s: warning %s' % (filename, ' '.join(warning))) + print('{}: warning {}'.format(filename, ' '.join(warning))) except IOError: - print('%s: cannot open' % filename) + print('{}: cannot open'.format(filename)) retval = 1 sys.exit(retval) diff --git a/contrib/devtools/symbol-check.py b/contrib/devtools/symbol-check.py index 1f196eb33..76e8cb4ec 100755 --- a/contrib/devtools/symbol-check.py +++ b/contrib/devtools/symbol-check.py @@ -1,179 +1,179 @@ #!/usr/bin/env python # Copyright (c) 2014 Wladimir J. van der Laan # Distributed under the MIT software license, see the accompanying # file COPYING or http://www.opensource.org/licenses/mit-license.php. ''' A script to check that the (Linux) executables produced by gitian only contain allowed gcc, glibc and libstdc++ version symbols. This makes sure they are still compatible with the minimum supported Linux distribution versions. Example usage: find ../gitian-builder/build -type f -executable | xargs python contrib/devtools/symbol-check.py ''' from __future__ import division, print_function, unicode_literals import subprocess import re import sys import os # Debian 8.11 (Jessie) has: # # - g++ version 4.9.2 (https://packages.debian.org/search?suite=default§ion=all&arch=any&searchon=names&keywords=g%2B%2B) # - libc version 2.19.18 (https://packages.debian.org/search?suite=default§ion=all&arch=any&searchon=names&keywords=libc6) # - libstdc++ version 4.8.4 (https://packages.debian.org/search?suite=default§ion=all&arch=any&searchon=names&keywords=libstdc%2B%2B6) # # Ubuntu 14.04 (Trusty Tahr) has: # # - g++ version 4.8.2 (https://packages.ubuntu.com/search?suite=trusty§ion=all&arch=any&keywords=g%2B%2B&searchon=names) # - libc version 2.19.0 (https://packages.ubuntu.com/search?suite=trusty§ion=all&arch=any&keywords=libc6&searchon=names) # - libstdc++ version 4.8.2 (https://packages.ubuntu.com/search?suite=trusty§ion=all&arch=any&keywords=libstdc%2B%2B&searchon=names) # # Taking the minimum of these as our target. # # According to GNU ABI document (http://gcc.gnu.org/onlinedocs/libstdc++/manual/abi.html) this corresponds to: # GCC 4.8.0: GCC_4.8.0 # GCC 4.8.0: GLIBCXX_3.4.18, CXXABI_1.3.7 # (glibc) GLIBC_2_19 # MAX_VERSIONS = { 'GCC': (4, 8, 0), 'CXXABI': (1, 3, 7), 'GLIBCXX': (3, 4, 18), 'GLIBC': (2, 19) } # See here for a description of _IO_stdin_used: # https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=634261#109 # Ignore symbols that are exported as part of every executable IGNORE_EXPORTS = { '_edata', '_end', '_init', '__bss_start', '_fini', '_IO_stdin_used', 'stdin', 'stdout', 'stderr', # Figure out why we get these symbols exported on xenial. '_ZNKSt5ctypeIcE8do_widenEc', 'in6addr_any', 'optarg', '_ZNSt16_Sp_counted_baseILN9__gnu_cxx12_Lock_policyE2EE10_M_destroyEv' } READELF_CMD = os.getenv('READELF', '/usr/bin/readelf') CPPFILT_CMD = os.getenv('CPPFILT', '/usr/bin/c++filt') # Allowed NEEDED libraries ALLOWED_LIBRARIES = { # bitcoind and bitcoin-qt 'libgcc_s.so.1', # GCC base support 'libc.so.6', # C library 'libpthread.so.0', # threading 'libanl.so.1', # DNS resolve 'libm.so.6', # math library 'librt.so.1', # real-time (clock) 'ld-linux-x86-64.so.2', # 64-bit dynamic linker 'ld-linux.so.2', # 32-bit dynamic linker # bitcoin-qt only 'libX11-xcb.so.1', # part of X11 'libX11.so.6', # part of X11 'libxcb.so.1', # part of X11 'libfontconfig.so.1', # font support 'libfreetype.so.6', # font parsing 'libdl.so.2' # programming interface to dynamic linker } class CPPFilt(object): ''' Demangle C++ symbol names. Use a pipe to the 'c++filt' command. ''' def __init__(self): self.proc = subprocess.Popen( CPPFILT_CMD, stdin=subprocess.PIPE, stdout=subprocess.PIPE, universal_newlines=True) def __call__(self, mangled): self.proc.stdin.write(mangled + '\n') self.proc.stdin.flush() return self.proc.stdout.readline().rstrip() def close(self): self.proc.stdin.close() self.proc.stdout.close() self.proc.wait() def read_symbols(executable, imports=True): ''' Parse an ELF executable and return a list of (symbol,version) tuples for dynamic, imported symbols. ''' p = subprocess.Popen([READELF_CMD, '--dyn-syms', '-W', executable], stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE, universal_newlines=True) (stdout, stderr) = p.communicate() if p.returncode: - raise IOError('Could not read symbols for %s: %s' % - (executable, stderr.strip())) + raise IOError('Could not read symbols for {}: {}'.format( + executable, stderr.strip())) syms = [] for line in stdout.splitlines(): line = line.split() if len(line) > 7 and re.match('[0-9]+:$', line[0]): (sym, _, version) = line[7].partition('@') is_import = line[6] == 'UND' if version.startswith('@'): version = version[1:] if is_import == imports: syms.append((sym, version)) return syms def check_version(max_versions, version): if '_' in version: (lib, _, ver) = version.rpartition('_') else: lib = version ver = '0' ver = tuple([int(x) for x in ver.split('.')]) if not lib in max_versions: return False return ver <= max_versions[lib] def read_libraries(filename): p = subprocess.Popen([READELF_CMD, '-d', '-W', filename], stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE, universal_newlines=True) (stdout, stderr) = p.communicate() if p.returncode: raise IOError('Error opening file') libraries = [] for line in stdout.splitlines(): tokens = line.split() if len(tokens) > 2 and tokens[1] == '(NEEDED)': match = re.match( '^Shared library: \[(.*)\]$', ' '.join(tokens[2:])) if match: libraries.append(match.group(1)) else: raise ValueError('Unparseable (NEEDED) specification') return libraries if __name__ == '__main__': cppfilt = CPPFilt() retval = 0 for filename in sys.argv[1:]: # Check imported symbols for sym, version in read_symbols(filename, True): if version and not check_version(MAX_VERSIONS, version): - print('%s: symbol %s from unsupported version %s' % - (filename, cppfilt(sym), version)) + print('{}: symbol {} from unsupported version {}'.format( + filename, cppfilt(sym), version)) retval = 1 # Check exported symbols for sym, version in read_symbols(filename, False): if sym in IGNORE_EXPORTS: continue - print('%s: export of symbol %s not allowed' % - (filename, cppfilt(sym))) + print('{}: export of symbol {} not allowed'.format( + filename, cppfilt(sym))) retval = 1 # Check dependency libraries for library_name in read_libraries(filename): if library_name not in ALLOWED_LIBRARIES: - print('%s: NEEDED library %s is not allowed' % - (filename, library_name)) + print('{}: NEEDED library {} is not allowed'.format( + filename, library_name)) retval = 1 sys.exit(retval) diff --git a/contrib/devtools/update-translations.py b/contrib/devtools/update-translations.py index 435af49b1..887877f5d 100755 --- a/contrib/devtools/update-translations.py +++ b/contrib/devtools/update-translations.py @@ -1,229 +1,229 @@ #!/usr/bin/env python # Copyright (c) 2014 Wladimir J. van der Laan # Distributed under the MIT software license, see the accompanying # file COPYING or http://www.opensource.org/licenses/mit-license.php. ''' Run this script from the root of the repository to update all translations from transifex. It will do the following automatically: - fetch all translations using the tx tool - post-process them into valid and committable format - remove invalid control characters - remove location tags (makes diffs less noisy) TODO: - auto-add new translations to the build system according to the translation process ''' from __future__ import division, print_function import subprocess import re import sys import os import io import xml.etree.ElementTree as ET # Name of transifex tool TX = 'tx' # Name of source language file SOURCE_LANG = 'bitcoin_en.ts' # Directory with locale files LOCALE_DIR = 'src/qt/locale' # Minimum number of messages for translation to be considered at all MIN_NUM_MESSAGES = 10 def check_at_repository_root(): if not os.path.exists('.git'): print('No .git directory found') print('Execute this script at the root of the repository', file=sys.stderr) sys.exit(1) def fetch_all_translations(): if subprocess.call([TX, 'pull', '-f', '-a']): print('Error while fetching translations', file=sys.stderr) sys.exit(1) def find_format_specifiers(s): '''Find all format specifiers in a string.''' pos = 0 specifiers = [] while True: percent = s.find('%', pos) if percent < 0: break specifiers.append(s[percent + 1]) pos = percent + 2 return specifiers def split_format_specifiers(specifiers): '''Split format specifiers between numeric (Qt) and others (strprintf)''' numeric = [] other = [] for s in specifiers: if s in {'1', '2', '3', '4', '5', '6', '7', '8', '9'}: numeric.append(s) else: other.append(s) # If both numeric format specifiers and "others" are used, assume we're dealing # with a Qt-formatted message. In the case of Qt formatting (see https://doc.qt.io/qt-5/qstring.html#arg) # only numeric formats are replaced at all. This means "(percentage: %1%)" is valid, without needing # any kind of escaping that would be necessary for strprintf. Without this, this function # would wrongly detect '%)' as a printf format specifier. if numeric: other = [] # numeric (Qt) can be present in any order, others (strprintf) must be in specified order return set(numeric), other def sanitize_string(s): '''Sanitize string for printing''' return s.replace('\n', ' ') def check_format_specifiers(source, translation, errors, numerus): source_f = split_format_specifiers(find_format_specifiers(source)) # assert that no source messages contain both Qt and strprintf format specifiers # if this fails, go change the source as this is hacky and confusing! assert(not(source_f[0] and source_f[1])) try: translation_f = split_format_specifiers( find_format_specifiers(translation)) except IndexError: - errors.append("Parse error in translation for '%s': '%s'" % ( + errors.append("Parse error in translation for '{}': '{}'".format( sanitize_string(source), sanitize_string(translation))) return False else: if source_f != translation_f: if numerus and source_f == (set(), ['n']) and translation_f == (set(), []) and translation.find('%') == -1: # Allow numerus translations to omit %n specifier (usually when it only has one possible value) return True - errors.append("Mismatch between '%s' and '%s'" % ( + errors.append("Mismatch between '{}' and '{}'".format( sanitize_string(source), sanitize_string(translation))) return False return True def all_ts_files(suffix=''): for filename in os.listdir(LOCALE_DIR): # process only language files, and do not process source language if not filename.endswith('.ts' + suffix) or filename == SOURCE_LANG + suffix: continue if suffix: # remove provided suffix filename = filename[0:-len(suffix)] filepath = os.path.join(LOCALE_DIR, filename) yield(filename, filepath) FIX_RE = re.compile(b'[\x00-\x09\x0b\x0c\x0e-\x1f]') def remove_invalid_characters(s): '''Remove invalid characters from translation string''' return FIX_RE.sub(b'', s) # Override cdata escape function to make our output match Qt's (optional, just for cleaner diffs for # comparison, disable by default) _orig_escape_cdata = None def escape_cdata(text): text = _orig_escape_cdata(text) text = text.replace("'", ''') text = text.replace('"', '"') return text def postprocess_translations(reduce_diff_hacks=False): print('Checking and postprocessing...') if reduce_diff_hacks: global _orig_escape_cdata _orig_escape_cdata = ET._escape_cdata ET._escape_cdata = escape_cdata for (filename, filepath) in all_ts_files(): os.rename(filepath, filepath + '.orig') have_errors = False for (filename, filepath) in all_ts_files('.orig'): # pre-fixups to cope with transifex output # need to override encoding because 'utf8' is not understood only 'utf-8' parser = ET.XMLParser(encoding='utf-8') with open(filepath + '.orig', 'rb') as f: data = f.read() # remove control characters; this must be done over the entire file otherwise the XML parser will fail data = remove_invalid_characters(data) tree = ET.parse(io.BytesIO(data), parser=parser) # iterate over all messages in file root = tree.getroot() for context in root.findall('context'): for message in context.findall('message'): numerus = message.get('numerus') == 'yes' source = message.find('source').text translation_node = message.find('translation') # pick all numerusforms if numerus: translations = [ i.text for i in translation_node.findall('numerusform')] else: translations = [translation_node.text] for translation in translations: if translation is None: continue errors = [] valid = check_format_specifiers( source, translation, errors, numerus) for error in errors: - print('%s: %s' % (filename, error)) + print('{}: {}'.format(filename, error)) if not valid: # set type to unfinished and clear string if invalid translation_node.clear() translation_node.set('type', 'unfinished') have_errors = True # Remove location tags for location in message.findall('location'): message.remove(location) # Remove entire message if it is an unfinished translation if translation_node.get('type') == 'unfinished': context.remove(message) # check if document is (virtually) empty, and remove it if so num_messages = 0 for context in root.findall('context'): for message in context.findall('message'): num_messages += 1 if num_messages < MIN_NUM_MESSAGES: - print('Removing %s, as it contains only %i messages' % - (filepath, num_messages)) + print('Removing {}, as it contains only {} messages'.format( + filepath, num_messages)) continue # write fixed-up tree # if diff reduction requested, replace some XML to 'sanitize' to qt formatting if reduce_diff_hacks: out = io.BytesIO() tree.write(out, encoding='utf-8') out = out.getvalue() out = out.replace(b' />', b'/>') with open(filepath, 'wb') as f: f.write(out) else: tree.write(filepath, encoding='utf-8') return have_errors if __name__ == '__main__': check_at_repository_root() fetch_all_translations() postprocess_translations()