diff --git a/.arclint b/.arclint --- a/.arclint +++ b/.arclint @@ -56,15 +56,6 @@ "type": "lint-tests", "include": "(^src/(seeder/|rpc/|wallet/)?test/.*\\.(cpp)$)" }, - "lint-python-format": { - "type": "lint-python-format", - "include": "(\\.py$)", - "exclude": [ - "(^test/lint/lint-python-format\\.py$)", - "(^contrib/gitian-builder/)", - "(^contrib/apple-sdk-tools/)" - ] - }, "phpcs": { "type": "phpcs", "include": "(\\.php$)", @@ -303,6 +294,15 @@ "type": "eslint", "version": ">=8.0.0", "include": "(web/cashtab/.*\\.js$)" + }, + "lint-python-flynt": { + "type": "lint-python-flynt", + "version": ">=0.78", + "include": "(\\.py$)", + "exclude": "(^contrib/)", + "flags": [ + "--transform-concats" + ] } } } diff --git a/arcanist/__phutil_library_map__.php b/arcanist/__phutil_library_map__.php --- a/arcanist/__phutil_library_map__.php +++ b/arcanist/__phutil_library_map__.php @@ -25,6 +25,7 @@ 'ESLinter' => 'linter/ESLinter.php', 'ExtendedConfigurationDrivenLintEngine' => 'engine/ExtendedConfigurationDrivenLintEngine.php', 'FileNameLinter' => 'linter/FileNameLinter.php', + 'FlyntFormattLinter' => 'linter/FlyntLinter.php', 'FormatStringLinter' => 'linter/FormatStringLinter.php', 'ISortFormatLinter' => 'linter/ISortLinter.php', 'IncludeGuardLinter' => 'linter/IncludeGuardLinter.php', @@ -37,7 +38,6 @@ 'MyPyLinter' => 'linter/MyPyLinter.php', 'PrettierLinter' => 'linter/PrettierLinter.php', 'PythonFileEncodingLinter' => 'linter/PythonFileEncodingLinter.php', - 'PythonFormatLinter' => 'linter/PythonFormatLinter.php', 'PythonMutableDefaultLinter' => 'linter/PythonMutableDefaultLinter.php', 'PythonShebangLinter' => 'linter/PythonShebangLinter.php', 'Qt5Linter' => 'linter/Qt5Linter.php', @@ -73,6 +73,7 @@ 'ESLinter' => 'ArcanistExternalLinter', 'ExtendedConfigurationDrivenLintEngine' => 'ArcanistLintEngine', 'FileNameLinter' => 'ArcanistLinter', + 'FlyntFormattLinter' => 'ArcanistExternalLinter', 'FormatStringLinter' => 'ArcanistExternalLinter', 'ISortFormatLinter' => 'ArcanistExternalLinter', 'IncludeGuardLinter' => 'ArcanistLinter', @@ -84,7 +85,6 @@ 'MyPyLinter' => 'ArcanistExternalLinter', 'PrettierLinter' => 'ArcanistExternalLinter', 'PythonFileEncodingLinter' => 'ArcanistLinter', - 'PythonFormatLinter' => 'ArcanistExternalLinter', 'PythonMutableDefaultLinter' => 'ArcanistLinter', 'PythonShebangLinter' => 'ArcanistLinter', 'Qt5Linter' => 'ArcanistLinter', diff --git a/arcanist/linter/FlyntLinter.php b/arcanist/linter/FlyntLinter.php new file mode 100644 --- /dev/null +++ b/arcanist/linter/FlyntLinter.php @@ -0,0 +1,139 @@ +<?php + +/** + * Flynt - string formatting converter + * + * Flynt is a command line tool to automatically convert a project's Python + * code from old "%-formatted" and .format(...) strings into Python 3.6+'s + * "f-strings". + * + * Limitations: + * - We don't set the --aggressive flag (see + * https://github.com/ikamensh/flynt#dangers-of-conversion) + * + * - It does not convert string with multiple use of a same variable. + * (this would work with the --aggressive flag) + * E.g.: + * '{addr.ip}:{addr.port}@'.format(addr=proxy1) + * "{0} blocks and {0} coinbase txes".format(num_blocks) + * + * - it does not convert string that use a variable as template string. + * This is good, as it can save us from repeating the template string. + * E.g: + * FORK_WARNING_MESSAGE.format(fork_block) + * + * - it does not convert strings that would result in lines longer than + * 88 characters. This could be changed by setting `--line-length 999` + * but then the result would be put on a single line and would have + * to be manually formatted. In some instances, especially when the + * arguments are longer than the template string, it may be better + * to keep the "...".format(...) string so that another linter can + * automatically format it. + * + * E.g.: + * dump_time_str = "# * Created on {}Z".format( + * datetime.datetime.fromtimestamp( + * dump_time, + * tz=datetime.timezone.utc, + * ) + * ) + */ +final class FlyntFormattLinter extends ArcanistExternalLinter { + + public function getInfoName() { + return 'flynt'; + } + + public function getInfoURI() { + return 'https://github.com/ikamensh/flynt'; + } + + public function getInfoDescription() { + return pht('Convert Python strings into f-strings.'); + } + + public function getLinterName() { + return 'flynt'; + } + + public function getLinterConfigurationName() { + return 'lint-python-flynt'; + } + + /** + * To make flynt write the output to stdout, the only solution as of + * version 0.78 (? current master branch) is to pass the input via stdin. + * We need to overload this to change the way the command is called. + * + * `cat inputfile.py | flynt -` + */ + protected function buildFutures(array $paths) { + $executable = $this->getExecutableCommand(); + + $bin = csprintf('%C %Ls -', $executable, $this->getCommandFlags()); + + $futures = array(); + foreach ($paths as $path) { + $disk_path = $this->getEngine()->getFilePathOnDisk($path); + $future = new ExecFuture('%C', $bin); + /* Write the input file to stdin */ + $input = file_get_contents($disk_path); + $future->write($input); + $future->setCWD($this->getProjectRoot()); + $futures[$path] = $future; + } + + return $futures; + } + + public function getDefaultBinary() { + return 'flynt'; + } + + protected function getMandatoryFlags() { + return array(); + } + + public function getInstallInstructions() { + return pht('Install flynt with `pip install flynt`'); + } + + public function shouldExpectCommandErrors() { + return false; + } + + protected function parseLinterOutput($path, $err, $stdout, $stderr) { + $ok = ($err == 0); + + if (!$ok) { + return false; + } + + $root = $this->getProjectRoot(); + $path = Filesystem::resolvePath($path, $root); + $orig = file_get_contents($path); + + /* the current master branch of flynt adds an extra newline to stdout */ + if (substr($stdout, -2, 2) == "\n\n") { + $stdout = substr($stdout, 0, -1); + } + + if ($orig == $stdout) { + return array(); + } + + $message = id(new ArcanistLintMessage()) + ->setPath($path) + ->setLine(1) + ->setChar(1) + ->setGranularity(ArcanistLinter::GRANULARITY_FILE) + ->setCode('FLYNT') + ->setSeverity(ArcanistLintSeverity::SEVERITY_AUTOFIX) + ->setName('Converting into f-strings') + ->setDescription("'$path' has strings to be converted into f-strings.") + ->setOriginalText($orig) + ->setReplacementText($stdout); + + return array($message); + } +} diff --git a/arcanist/linter/PythonFormatLinter.php b/arcanist/linter/PythonFormatLinter.php deleted file mode 100644 --- a/arcanist/linter/PythonFormatLinter.php +++ /dev/null @@ -1,94 +0,0 @@ -<?php - -/** - * Uses the lint-python-format.py script to update from old % format style to - * new .format(). - */ -final class PythonFormatLinter extends ArcanistExternalLinter { - - public function getInfoName() { - return 'lint-python-format'; - } - - public function getInfoURI() { - return ''; - } - - public function getInfoDescription() { - return pht('Convert python string formatting from %% to .format().'); - } - - public function getLinterName() { - return 'lint-python-format'; - } - - public function getLinterConfigurationName() { - return 'lint-python-format'; - } - - public function getLinterConfigurationOptions() { - $options = array(); - return $options + parent::getLinterConfigurationOptions(); - } - - public function getDefaultBinary() { - return Filesystem::resolvePath('test/lint/lint-python-format.py', - $this->getProjectRoot()); - } - - public function shouldUseInterpreter() { - return true; - } - - public function getDefaultInterpreter() { - return "python3"; - } - - public function getInstallInstructions() { - return pht('The test/lint/lint-python-format.py script is part of the '. - 'bitcoin-abc project'); - } - - public function shouldExpectCommandErrors() { - return false; - } - - protected function getMandatoryFlags() { - return array(); - } - - protected function parseLinterOutput($path, $err, $stdout, $stderr) { - $pattern = '/\((\d+)\) ([\s\S]+?)=> (.+)/'; - $found = preg_match_all($pattern, $stdout, $snippets, - $flags = PREG_SET_ORDER); - - /* - * Matched snippets $snippets are organized like this: - * [0] The complete mask - * [1] The line number - * [2] The original snippet - * [3] The replacement snippet - */ - - if (!$found) { - return array(); - } - - $messages = []; - foreach($snippets as $snippet) { - $messages[] = id(new ArcanistLintMessage()) - ->setPath($path) - ->setLine($snippet[1]) - ->setChar(1) - ->setGranularity(ArcanistLinter::GRANULARITY_FILE) - ->setCode('PYFMT') - ->setSeverity(ArcanistLintSeverity::SEVERITY_AUTOFIX) - ->setName('Old string format notation') - ->setDescription("'$path' uses old style string formatting.") - ->setOriginalText(rtrim($snippet[2])) - ->setReplacementText($snippet[3]); - } - - return $messages; - } -} diff --git a/test/lint/lint-python-format-tests.txt b/test/lint/lint-python-format-tests.txt deleted file mode 100644 --- a/test/lint/lint-python-format-tests.txt +++ /dev/null @@ -1,80 +0,0 @@ -# This file contains python code with % string formatters to test the -# lint-python-format.py script. - -# Single line -"test %s" % "string" -"pi %.2f" % 3.1415 - -# Multiple lines -"test %s" % - "string" -"test %s" % \ - "string" -"test %s" \ - % "string" -"test %s %s %s" \ - % ("1", "2", "3") -"test %s %s %s" % \ - ("1", "2", "3") -"test %s %s %s" \ - % ("1", - "2", "3") -"test %s %s %s" \ - % ("0" \ - + "1", - "2", "3") - -# In a comment -# "test %s" % "string" - -# Nested comment -"test %s %s %s" \ - % ("1", - #"4", - "2", "3") - -# Inlined comments are not supported -# "test %s %s %s" \ -# % ("1", #4, -# "2", "3") - -# Nested format inside a list, dict or function -["test %s" % "string"] -{"key1":"%s" % "value1", "key2":"value2"} -f("%d" % len("string"), argument2) -f("%d %s" % (len("string"), "argument1"), argument2) -["test %s %s" % - ("string1", "string2")] - -# Multiple formats on a single line -("%s" % "string1", "%s" % "string2") -("%s" % "string1", "%s %s" % ("string2", "string3") -("%s %s" % ("string1", "string2"), "%s %s" % ("string3", "string4")) - -# Combo ! -["test %05i %% %s" % - (len("string1"), - "%d %-10s %%" % (len("string2"), - "string2"))] - -# Code with % but not using it for formatting should not throw any linter error -"test %100" -"test %\n" -"test %%" -"test %d" 42 -"test % 42" -"test %\ - 1" -"test % \ - 1" -a = 10 % 2 - -# An array -"test %s" % an_array[0] -# A matrix -"test %s" % an_array[0][0] -# An array in an array -["test %s" % an_array[0]] -# An matrix in an array in a dict -{"test":" ["test %s" % an_array[0][0]]} - diff --git a/test/lint/lint-python-format.py b/test/lint/lint-python-format.py deleted file mode 100755 --- a/test/lint/lint-python-format.py +++ /dev/null @@ -1,284 +0,0 @@ -#!/usr/bin/env python3 -# -# Copyright (c) 2019 The Bitcoin developers -# Distributed under the MIT software license, see the accompanying -# file COPYING or http://www.opensource.org/licenses/mit-license.php. -# -# Lint python format : This program checks that the old python fomatting method -# is not being used (formatting with "string %s" % content). -# The new "{}".format(content) or f"{} content" method should be used instead. -# Usage of the % formatter is expected to be deprecated by python in the -# future. - -import re -import sys -from doctest import testmod - - -def is_complete(snippet): - r"""Check if a code snippet is complete. - - >>> is_complete("a = [1, 2, 3]") - True - >>> is_complete("a = [1,") - False - >>> is_complete("a = [1, (") - False - >>> is_complete("a = [") - False - >>> is_complete("a = [1, {") - False - >>> is_complete('a = [1, 2, "%d" % \\') - False - >>> is_complete('a = [1, 2, \"\"\"') - False - >>> is_complete('"%s" %') - False - """ - can_continue = [',', '(', '[', '{', '\\', '"""', '%'] - return not any(snippet.strip().endswith(end) for end in can_continue) - - -def build_replacement(error): - r"""Replace a snippet using the % formatter with a version using .format(). - - >>> build_replacement('"test %s" % "1"') - '"test {}".format("1")' - >>> build_replacement('"test %s" % ("1")') - '"test {}".format("1")' - >>> build_replacement('"test %.2f" % 3.1415') - '"test {:.2f}".format(3.1415)' - >>> build_replacement('"test %s" \\\n% "1"') - '"test {}".format("1")' - >>> build_replacement('"test %s" %\\\n"1"') - '"test {}".format("1")' - >>> build_replacement('"test %s %s %s" % ("1", "2", "3")') - '"test {} {} {}".format("1", "2", "3")' - >>> build_replacement('"test %s %s %s" % \\\n("1", "2", "3")') - '"test {} {} {}".format("1", "2", "3")' - >>> build_replacement('"test %s %s %s" % \\\n("1",\n"2", "3")') - '"test {} {} {}".format("1", "2", "3")' - >>> build_replacement('"test %d" % 1') - '"test {}".format(1)' - >>> build_replacement('"test %i" % 1') - '"test {}".format(1)' - >>> build_replacement('"test %r" % "1"') - '"test {!r}".format("1")' - >>> build_replacement('"test %-10s" % "1"') - '"test {:10s}".format("1")' - >>> build_replacement('"test %s.%s" % ("1", "2")') - '"test {}.{}".format("1", "2")' - >>> build_replacement('"test %% %s %s" % ("1", "2")') - '"test % {} {}".format("1", "2")' - >>> build_replacement('"test %s %% %s" % ("1", "2")') - '"test {} % {}".format("1", "2")' - >>> build_replacement('"test %s %s %%" % ("1", "2")') - '"test {} {} %".format("1", "2")' - >>> build_replacement('"test %%%s%%%s%%" % ("1", "2")') - '"test %{}%{}%".format("1", "2")' - """ - # Inline the error snippet. - # Replace line continuation ('\'), line breaks and their surrounding - # spaces and indentation to a single space character - replacement = re.sub(r"\s*\\\s+", " ", error, re.MULTILINE) - replacement = re.sub(r"\s*(?:\r?\n|\r(?!\n))\s*", - " ", replacement, re.MULTILINE) - - # Escape the %% in 2 passes to avoid the % character to mess up with the - # regexes - # First change %% to \xec\xec then back \xec\xec to % (\xec is the infinity - # symbol in extended ascii, it is unlikely to encounter it twice) - replacement = re.sub(r"%%", "\xec\xec", replacement, re.MULTILINE) - - # Replace the specifiers, retaining their content. - # E.g. %.2f => {:.2f} - def specifier_sub(match): - # There are some special cases to handle: - # - {:s} only works with strings, but %s worked with almost anything. - # To avoid type errors, just use {} - # - {:i} does not exists, it should be {:d} or better {}. - # - {:r} is invalid, the new syntax is {!r} - # - The left alignement marker (e.g. %-5s) is now the default, remove it - specifier = match.group(1) - specifier_converts_to_empty_brackets = ["s", "i", "d"] - if specifier in specifier_converts_to_empty_brackets: - return "{}" - elif specifier == "r": - return "{!r}" - elif specifier.startswith("-"): - return f"{{:{specifier[1:]}}}" - specifier = specifier.replace("i", "d") - return f"{{:{specifier}}}" - - (replacement, count) = re.subn( - r"%([.-]?[0-9]*[a-zA-Z])", specifier_sub, replacement, flags=re.MULTILINE) - - # Replace the qualifier. - # E.g % 42 => .format(42) - # E.g. % (42, "my_string") => .format(42, "my_string") - def single_qualifier_sub(match): - qualifier = f".format({match.group(1).strip()}" - # Where to close the parenthesis if there is a single specifier ? - # It is whether at the end or before the first ',', ']', '}' (if - # enclosed in a function call, a list or a dictionary). - # - # There is a special case to be handled when the qualifier is an array. - # In this case, ensure there is one more ']' than '['. - close_before = [",", "]", "}"] - opening_count = 0 - for i, c in enumerate(qualifier): - if c == "[": - opening_count += 1 - if c in close_before: - if c == "]" and opening_count > 0: - opening_count -= 1 - continue - return f"{qualifier[:i]}){qualifier[i:]}" - return f"{qualifier})" - - def multi_qualifier_sub(match): - # The closing parenthesis is already there as we are replacing a tuple - qualifier = f".format({match.group(1).strip()}" - return qualifier - - # There are 2 possible way to write the qualifier: - # - If there is a single qualifier, it can be set directly. - # E.g.: "%s" % "string" - # - It can always be set as a tuple: - # E.g.: "%s" % ("string") - # E.g.: "%s %s" % ("string1", "string2") - # - # Solution: try to find the pattern with the opening parenthesis first, then - # fall back to no parenthesis. - replacement = re.sub(r"\s*(?<!%)%\s+\(([^%]+)", multi_qualifier_sub, - replacement, flags=re.MULTILINE) - replacement = re.sub(r"\s*(?<!%)%\s+([^%]+)", single_qualifier_sub, replacement, - flags=re.MULTILINE) - - # Second pass of %% escaping, replace \xec\xec with % - replacement = re.sub(r"\xec\xec", "%", replacement, re.MULTILINE) - - return replacement - - -def find_snippets(file): - """Find code snippets in the source file that contains the percent ('%') - character""" - with open(file, 'r', encoding='utf-8') as f: - snippet_line = "" - snippets = {} - - for line_number, line in enumerate(f): - # Skip comments - if not line.strip().startswith('#'): - # If we are not already in a snippet and the line contains a % - # character, start saving the snippet - if not snippet_line and '%' in line: - snippet_line = str(line_number + 1) - snippets[snippet_line] = "" - - # In a snippet ? - # - save the line - # - check if the snippet is complete - if snippet_line: - snippets[snippet_line] += line - if is_complete(line): - snippet_line = "" - - return snippets - - -def find_errors(file): - """Extract snippets using the % symbol as a formatter with their line - number""" - pattern = re.compile(r"(?:\"|')\s*\\?\s+%\s+(?:\\\s+)?.+$", re.MULTILINE) - snippets = find_snippets(file) - return dict( - [(line, snippet) for line, snippet in snippets.items() if pattern.search(snippet) is not None]) - - -def main(file): - r"""Print line number and code snippets using the % formatter from the file, - and suggest a replacement using the .format() method. - Output format is : - (<line number>) <original snippet> - => <replacement snippet> - - >>> main("test/lint/lint-python-format-tests.txt") - (5) "test %s" % "string" - => "test {}".format("string") - (6) "pi %.2f" % 3.1415 - => "pi {:.2f}".format(3.1415) - (9) "test %s" % - "string" - => "test {}".format("string") - (11) "test %s" % \ - "string" - => "test {}".format("string") - (13) "test %s" \ - % "string" - => "test {}".format("string") - (15) "test %s %s %s" \ - % ("1", "2", "3") - => "test {} {} {}".format("1", "2", "3") - (17) "test %s %s %s" % \ - ("1", "2", "3") - => "test {} {} {}".format("1", "2", "3") - (19) "test %s %s %s" \ - % ("1", - "2", "3") - => "test {} {} {}".format("1", "2", "3") - (22) "test %s %s %s" \ - % ("0" \ - + "1", - "2", "3") - => "test {} {} {}".format("0" + "1", "2", "3") - (31) "test %s %s %s" \ - % ("1", - "2", "3") - => "test {} {} {}".format("1", "2", "3") - (42) ["test %s" % "string"] - => ["test {}".format("string")] - (43) {"key1":"%s" % "value1", "key2":"value2"} - => {"key1":"{}".format("value1"), "key2":"value2"} - (44) f("%d" % len("string"), argument2) - => f("{}".format(len("string")), argument2) - (45) f("%d %s" % (len("string"), "argument1"), argument2) - => f("{} {}".format(len("string"), "argument1"), argument2) - (46) ["test %s %s" % - ("string1", "string2")] - => ["test {} {}".format("string1", "string2")] - (50) ("%s" % "string1", "%s" % "string2") - => ("{}".format("string1"), "{}".format("string2")) - (51) ("%s" % "string1", "%s %s" % ("string2", "string3") - => ("{}".format("string1"), "{} {}".format("string2", "string3") - (52) ("%s %s" % ("string1", "string2"), "%s %s" % ("string3", "string4")) - => ("{} {}".format("string1", "string2"), "{} {}".format("string3", "string4")) - (55) ["test %05i %% %s" % - (len("string1"), - "%d %-10s %%" % (len("string2"), - "string2"))] - => ["test {:05d} % {}".format(len("string1"), "{} {:10s} %".format(len("string2"), "string2"))] - (73) "test %s" % an_array[0] - => "test {}".format(an_array[0]) - (75) "test %s" % an_array[0][0] - => "test {}".format(an_array[0][0]) - (77) ["test %s" % an_array[0]] - => ["test {}".format(an_array[0])] - (79) {"test":" ["test %s" % an_array[0][0]]} - => {"test":" ["test {}".format(an_array[0][0])]} - """ - errors = find_errors(file) - # Python dictionnaries do not guarantee ordering, sort by line number - for line_number, error in sorted(errors.items(), - key=lambda pair: int(pair[0])): - replacement = build_replacement(error) - print(f"({line_number}) {error.rstrip()}") - print(f"=> {replacement}") - - -if __name__ == "__main__": - if len(sys.argv) != 2: - sys.exit(testmod()[1]) - else: - main(sys.argv[1])