diff --git a/.arclint b/.arclint --- a/.arclint +++ b/.arclint @@ -53,6 +53,13 @@ "(^arcanist/__phutil_library_.+\\.php$)" ], "phpcs.standard": "arcanist/linter/phpcs_ruleset.xml" + }, + "lint-locale-dependence": { + "type": "lint-locale-dependence", + "include": "(^src/.*\\.(h|cpp)$)", + "exclude": [ + "(^src/(crypto/ctaes/|leveldb/|secp256k1/|seeder/|tinyformat.h|univalue/))" + ] } } } diff --git a/arcanist/.phutil_module_cache b/arcanist/.phutil_module_cache --- a/arcanist/.phutil_module_cache +++ b/arcanist/.phutil_module_cache @@ -1 +1 @@ -{"__symbol_cache_version__":11,"90a8b110dc475955f15bb81d37268cb5":{"have":{"class":{"AutoPEP8FormatLinter":75}},"need":{"function":{"pht":297,"execx":769,"id":1903},"class":{"ArcanistExternalLinter":104,"ArcanistLintMessage":1910,"Filesystem":1754,"ArcanistLinter":2017,"ArcanistLintSeverity":2095}},"xmap":{"AutoPEP8FormatLinter":["ArcanistExternalLinter"]}},"bf0805c02029a7226e8c0d7dee039b3c":{"have":{"class":{"CheckDocLinter":106}},"need":{"function":{"pht":323,"id":1847},"class":{"ArcanistExternalLinter":129,"ArcanistLintMessage":1854,"Filesystem":731,"ArcanistLinter":1902,"ArcanistLintSeverity":1988}},"xmap":{"CheckDocLinter":["ArcanistExternalLinter"]}},"6af7410cfea496ff1d4dcc2624b6b8ea":{"have":{"class":{"ClangFormatLinter":79}},"need":{"function":{"pht":302,"execx":781,"id":1653},"class":{"ArcanistExternalLinter":105,"ArcanistLintMessage":1660,"Filesystem":1504,"ArcanistLinter":1767,"ArcanistLintSeverity":1845}},"xmap":{"ClangFormatLinter":["ArcanistExternalLinter"]}},"6f2f22dd0f259fb2eaa284b4fab3bc29":{"have":{"class":{"PythonFormatLinter":123}},"need":{"function":{"pht":353,"id":1838},"class":{"ArcanistExternalLinter":150,"ArcanistLintMessage":1845,"Filesystem":776,"ArcanistLinter":1970,"ArcanistLintSeverity":2053}},"xmap":{"PythonFormatLinter":["ArcanistExternalLinter"]}},"25781df78f6eebfb223296b8265e9d19":{"have":{"class":{"TestsLinter":103}},"need":{"function":{"pht":318,"id":2629},"class":{"ArcanistExternalLinter":123,"ArcanistLintMessage":2636,"Filesystem":776,"ArcanistLinter":2684,"ArcanistLintSeverity":2792}},"xmap":{"TestsLinter":["ArcanistExternalLinter"]}},"9285ad9415f8ebe564f7119e5a72c559":{"have":{"class":{"FormatStringLinter":146}},"need":{"function":{"pht":377,"csprintf":1492,"id":1872},"class":{"ArcanistExternalLinter":173,"ArcanistLintMessage":1879,"Filesystem":827,"ArcanistLinter":1956,"ArcanistLintSeverity":2044}},"xmap":{"FormatStringLinter":["ArcanistExternalLinter"]}}} \ No newline at end of file +{"__symbol_cache_version__":11,"90a8b110dc475955f15bb81d37268cb5":{"have":{"class":{"AutoPEP8FormatLinter":75}},"need":{"function":{"pht":297,"execx":769,"id":1903},"class":{"ArcanistExternalLinter":104,"ArcanistLintMessage":1910,"Filesystem":1754,"ArcanistLinter":2017,"ArcanistLintSeverity":2095}},"xmap":{"AutoPEP8FormatLinter":["ArcanistExternalLinter"]}},"bf0805c02029a7226e8c0d7dee039b3c":{"have":{"class":{"CheckDocLinter":106}},"need":{"function":{"pht":323,"id":1847},"class":{"ArcanistExternalLinter":129,"ArcanistLintMessage":1854,"Filesystem":731,"ArcanistLinter":1902,"ArcanistLintSeverity":1988}},"xmap":{"CheckDocLinter":["ArcanistExternalLinter"]}},"6af7410cfea496ff1d4dcc2624b6b8ea":{"have":{"class":{"ClangFormatLinter":79}},"need":{"function":{"pht":302,"execx":781,"id":1653},"class":{"ArcanistExternalLinter":105,"ArcanistLintMessage":1660,"Filesystem":1504,"ArcanistLinter":1767,"ArcanistLintSeverity":1845}},"xmap":{"ClangFormatLinter":["ArcanistExternalLinter"]}},"9285ad9415f8ebe564f7119e5a72c559":{"have":{"class":{"FormatStringLinter":146}},"need":{"function":{"pht":377,"csprintf":1492,"id":1872},"class":{"ArcanistExternalLinter":173,"ArcanistLintMessage":1879,"Filesystem":827,"ArcanistLinter":1956,"ArcanistLintSeverity":2044}},"xmap":{"FormatStringLinter":["ArcanistExternalLinter"]}},"6f2f22dd0f259fb2eaa284b4fab3bc29":{"have":{"class":{"PythonFormatLinter":123}},"need":{"function":{"pht":353,"id":1838},"class":{"ArcanistExternalLinter":150,"ArcanistLintMessage":1845,"Filesystem":776,"ArcanistLinter":1970,"ArcanistLintSeverity":2053}},"xmap":{"PythonFormatLinter":["ArcanistExternalLinter"]}},"25781df78f6eebfb223296b8265e9d19":{"have":{"class":{"TestsLinter":103}},"need":{"function":{"pht":318,"id":2629},"class":{"ArcanistExternalLinter":123,"ArcanistLintMessage":2636,"Filesystem":776,"ArcanistLinter":2684,"ArcanistLintSeverity":2792}},"xmap":{"TestsLinter":["ArcanistExternalLinter"]}},"224d394856b17878058b4c14acb7178b":{"have":{"class":{"LocaleDependenceLinter":160}},"need":{"function":{"pht":5400},"class":{"ArcanistLinter":191,"ArcanistLintSeverity":5903,"Filesystem":6149}},"xmap":{"LocaleDependenceLinter":["ArcanistLinter"]}}} \ No newline at end of file diff --git a/arcanist/__phutil_library_map__.php b/arcanist/__phutil_library_map__.php --- a/arcanist/__phutil_library_map__.php +++ b/arcanist/__phutil_library_map__.php @@ -13,6 +13,7 @@ 'CheckDocLinter' => 'linter/CheckDocLinter.php', 'ClangFormatLinter' => 'linter/ClangFormatLinter.php', 'FormatStringLinter' => 'linter/FormatStringLinter.php', + 'LocaleDependenceLinter' => 'linter/LocaleDependenceLinter.php', 'PythonFormatLinter' => 'linter/PythonFormatLinter.php', 'TestsLinter' => 'linter/TestsLinter.php', ), @@ -22,6 +23,7 @@ 'CheckDocLinter' => 'ArcanistExternalLinter', 'ClangFormatLinter' => 'ArcanistExternalLinter', 'FormatStringLinter' => 'ArcanistExternalLinter', + 'LocaleDependenceLinter' => 'ArcanistLinter', 'PythonFormatLinter' => 'ArcanistExternalLinter', 'TestsLinter' => 'ArcanistExternalLinter', ), diff --git a/arcanist/linter/LocaleDependenceLinter.php b/arcanist/linter/LocaleDependenceLinter.php new file mode 100644 --- /dev/null +++ b/arcanist/linter/LocaleDependenceLinter.php @@ -0,0 +1,283 @@ + [ + "stoul", + "trim_right", + "atoi", + ], + "src/core_read.cpp" => ["is_digit"], + "src/dbwrapper.cpp" => ["vsnprintf"], + "src/httprpc.cpp" => ["trim"], + "src/init.cpp" => ["atoi"], + "src/netbase.cpp" => ["to_lower"], + "src/qt/rpcconsole.cpp" => [ + "atoi", + "isdigit", + ], + "src/rest.cpp" => ["strtol"], + "src/rpc/server.cpp" => ["to_upper"], + "src/test/dbwrapper_tests.cpp" => ["snprintf"], + "src/test/getarg_tests.cpp" => [ + "split", + "is_space", + ], + "src/torcontrol.cpp" => ["atoi"], + "src/uint256.cpp" => ["tolower"], + "src/util.cpp" => ["atoi", "tolower"], + "src/utilmoneystr.cpp" => ["isdigit"], + "src/utilstrencodings.cpp" => [ + "atoi", + "strtol", + "strtoll", + "strtoul", + "strtoull", + ], + "src/utilstrencodings.h" => ["atoi"], + ); + + const LOCALE_DEPENDENT_FUNCTIONS = array( + "alphasort", // LC_COLLATE (via strcoll) + "asctime", // LC_TIME (directly) + "asprintf", // (via vasprintf) + "atof", // LC_NUMERIC (via strtod) + "atoi", // LC_NUMERIC (via strtol) + "atol", // LC_NUMERIC (via strtol) + "atoll", // (via strtoll) + "atoq", + "btowc", // LC_CTYPE (directly) + "ctime", // (via asctime or localtime) + "dprintf", // (via vdprintf) + "fgetwc", + "fgetws", + "fold_case", // boost::locale::fold_case + //"fprintf" // (via vfprintf) + "fputwc", + "fputws", + "fscanf", // (via __vfscanf) + "fwprintf", // (via __vfwprintf) + "getdate", // via __getdate_r => isspace // __localtime_r + "getwc", + "getwchar", + "is_digit", // boost::algorithm::is_digit + "is_space", // boost::algorithm::is_space + "isalnum", // LC_CTYPE + "isalpha", // LC_CTYPE + "isblank", // LC_CTYPE + "iscntrl", // LC_CTYPE + "isctype", // LC_CTYPE + "isdigit", // LC_CTYPE + "isgraph", // LC_CTYPE + "islower", // LC_CTYPE + "isprint", // LC_CTYPE + "ispunct", // LC_CTYPE + "isspace", // LC_CTYPE + "isupper", // LC_CTYPE + "iswalnum", // LC_CTYPE + "iswalpha", // LC_CTYPE + "iswblank", // LC_CTYPE + "iswcntrl", // LC_CTYPE + "iswctype", // LC_CTYPE + "iswdigit", // LC_CTYPE + "iswgraph", // LC_CTYPE + "iswlower", // LC_CTYPE + "iswprint", // LC_CTYPE + "iswpunct", // LC_CTYPE + "iswspace", // LC_CTYPE + "iswupper", // LC_CTYPE + "iswxdigit", // LC_CTYPE + "isxdigit", // LC_CTYPE + "localeconv", // LC_NUMERIC + LC_MONETARY + "mblen", // LC_CTYPE + "mbrlen", + "mbrtowc", + "mbsinit", + "mbsnrtowcs", + "mbsrtowcs", + "mbstowcs", // LC_CTYPE + "mbtowc", // LC_CTYPE + "mktime", + "normalize", // boost::locale::normalize + //"printf" // LC_NUMERIC + "putwc", + "putwchar", + "scanf", // LC_NUMERIC + "setlocale", + "snprintf", + "sprintf", + "sscanf", + "stod", + "stof", + "stoi", + "stol", + "stold", + "stoll", + "stoul", + "stoull", + "strcasecmp", + "strcasestr", + "strcoll", // LC_COLLATE + //"strerror" + "strfmon", + "strftime", // LC_TIME + "strncasecmp", + "strptime", + "strtod", // LC_NUMERIC + "strtof", + "strtoimax", + "strtol", // LC_NUMERIC + "strtold", + "strtoll", + "strtoq", + "strtoul", // LC_NUMERIC + "strtoull", + "strtoumax", + "strtouq", + "strxfrm", // LC_COLLATE + "swprintf", + "to_lower", // boost::locale::to_lower + "to_title", // boost::locale::to_title + "to_upper", // boost::locale::to_upper + "tolower", // LC_CTYPE + "toupper", // LC_CTYPE + "towctrans", + "towlower", // LC_CTYPE + "towupper", // LC_CTYPE + "trim", // boost::algorithm::trim + "trim_left", // boost::algorithm::trim_left + "trim_right", // boost::algorithm::trim_right + "ungetwc", + "vasprintf", + "vdprintf", + "versionsort", + "vfprintf", + "vfscanf", + "vfwprintf", + "vprintf", + "vscanf", + "vsnprintf", + "vsprintf", + "vsscanf", + "vswprintf", + "vwprintf", + "wcrtomb", + "wcscasecmp", + "wcscoll", // LC_COLLATE + "wcsftime", // LC_TIME + "wcsncasecmp", + "wcsnrtombs", + "wcsrtombs", + "wcstod", // LC_NUMERIC + "wcstof", + "wcstoimax", + "wcstol", // LC_NUMERIC + "wcstold", + "wcstoll", + "wcstombs", // LC_CTYPE + "wcstoul", // LC_NUMERIC + "wcstoull", + "wcstoumax", + "wcswidth", + "wcsxfrm", // LC_COLLATE + "wctob", + "wctomb", // LC_CTYPE + "wctrans", + "wctype", + "wcwidth", + "wprintf", + ); + + const LOCALE_DEPENDENCE_ERROR = 1; + + const ADVICE_MESSAGE = << ArcanistLintSeverity::SEVERITY_ERROR, + ); + } + + public function getLintNameMap() { + return array( + self::LOCALE_DEPENDENCE_ERROR => pht('Locale dependent function'), + ); + } + + public function lintPath($path) { + $absolutePath = Filesystem::resolvePath($path, $this->getProjectRoot()); + $fileContent = file($absolutePath, FILE_SKIP_EMPTY_LINES); + + // Flag if the path is part of the known exceptions + $exceptions = array(); + if (array_key_exists($path, self::KNOWN_VIOLATIONS)) { + $exceptions = self::KNOWN_VIOLATIONS[$path]; + } + + $anyFunction = implode("|", self::LOCALE_DEPENDENT_FUNCTIONS); + $pattern = "/[^\w`'\"<>](?P".$anyFunction."(_r|_s)?)[^\w`'\"<>]/"; + + foreach ($fileContent as $lineNumber => $lineContent) { + // Filter comments and string constants + if (preg_match("#^\s*(//|\*|/\*|\")#", $lineContent)) { + continue; + } + + /* + * Find the locale dependent functions occurrences. + * There can be multiple occurrences for a single line. + */ + if (preg_match_all($pattern, $lineContent, $matches, + PREG_OFFSET_CAPTURE)) { + foreach($matches["function"] as $function) { + list($functionName, $offset) = $function; + + // Filter known exceptions + if (!in_array($functionName, $exceptions)) { + $this->raiseLintAtLine( + $lineNumber + 1, + $offset + 1, + self::LOCALE_DEPENDENCE_ERROR, + self::ADVICE_MESSAGE, + $functionName); + } + } + } + } + } +} diff --git a/test/lint/lint-locale-dependence.sh b/test/lint/lint-locale-dependence.sh deleted file mode 100755 --- a/test/lint/lint-locale-dependence.sh +++ /dev/null @@ -1,221 +0,0 @@ -#!/usr/bin/env bash - -KNOWN_VIOLATIONS=( - "src/bitcoin-tx.cpp.*stoul" - "src/bitcoin-tx.cpp.*trim_right" - "src/bitcoin-tx.cpp:.*atoi" - "src/core_read.cpp.*is_digit" - "src/dbwrapper.cpp:.*vsnprintf" - "src/httprpc.cpp.*trim" - "src/init.cpp:.*atoi" - "src/qt/rpcconsole.cpp:.*atoi" - "src/qt/rpcconsole.cpp:.*isdigit" - "src/rest.cpp:.*strtol" - "src/test/dbwrapper_tests.cpp:.*snprintf" - "src/test/getarg_tests.cpp.*split" - "src/torcontrol.cpp:.*atoi" - "src/uint256.cpp:.*tolower" - "src/util.cpp:.*atoi" - "src/util.cpp:.*tolower" - "src/utilmoneystr.cpp:.*isdigit" - "src/utilstrencodings.cpp:.*atoi" - # Append the opening parenthesis to avoid shadowing strtoll with grep - "src/utilstrencodings.cpp:.*strtol\(" - "src/utilstrencodings.cpp:.*strtoll" - # Append the opening parenthesis to avoid shadowing strtoull with grep - "src/utilstrencodings.cpp:.*strtoul\(" - "src/utilstrencodings.cpp:.*strtoull" - "src/utilstrencodings.h:.*atoi" -) - -REGEXP_IGNORE_EXTERNAL_DEPENDENCIES="src/(crypto/ctaes/|leveldb/|secp256k1/|seeder/|tinyformat.h|univalue/)" - -LOCALE_DEPENDENT_FUNCTIONS=( - alphasort # LC_COLLATE (via strcoll) - asctime # LC_TIME (directly) - asprintf # (via vasprintf) - atof # LC_NUMERIC (via strtod) - atoi # LC_NUMERIC (via strtol) - atol # LC_NUMERIC (via strtol) - atoll # (via strtoll) - atoq - btowc # LC_CTYPE (directly) - ctime # (via asctime or localtime) - dprintf # (via vdprintf) - fgetwc - fgetws - fold_case # boost::locale::fold_case -# fprintf # (via vfprintf) - fputwc - fputws - fscanf # (via __vfscanf) - fwprintf # (via __vfwprintf) - getdate # via __getdate_r => isspace // __localtime_r - getwc - getwchar - is_digit # boost::algorithm::is_digit - is_space # boost::algorithm::is_space - isalnum # LC_CTYPE - isalpha # LC_CTYPE - isblank # LC_CTYPE - iscntrl # LC_CTYPE - isctype # LC_CTYPE - isdigit # LC_CTYPE - isgraph # LC_CTYPE - islower # LC_CTYPE - isprint # LC_CTYPE - ispunct # LC_CTYPE - isspace # LC_CTYPE - isupper # LC_CTYPE - iswalnum # LC_CTYPE - iswalpha # LC_CTYPE - iswblank # LC_CTYPE - iswcntrl # LC_CTYPE - iswctype # LC_CTYPE - iswdigit # LC_CTYPE - iswgraph # LC_CTYPE - iswlower # LC_CTYPE - iswprint # LC_CTYPE - iswpunct # LC_CTYPE - iswspace # LC_CTYPE - iswupper # LC_CTYPE - iswxdigit # LC_CTYPE - isxdigit # LC_CTYPE - localeconv # LC_NUMERIC + LC_MONETARY - mblen # LC_CTYPE - mbrlen - mbrtowc - mbsinit - mbsnrtowcs - mbsrtowcs - mbstowcs # LC_CTYPE - mbtowc # LC_CTYPE - mktime - normalize # boost::locale::normalize -# printf # LC_NUMERIC - putwc - putwchar - scanf # LC_NUMERIC - setlocale - snprintf - sprintf - sscanf - stod - stof - stoi - stol - stold - stoll - stoul - stoull - strcasecmp - strcasestr - strcoll # LC_COLLATE -# strerror - strfmon - strftime # LC_TIME - strncasecmp - strptime - strtod # LC_NUMERIC - strtof - strtoimax - strtol # LC_NUMERIC - strtold - strtoll - strtoq - strtoul # LC_NUMERIC - strtoull - strtoumax - strtouq - strxfrm # LC_COLLATE - swprintf - to_lower # boost::locale::to_lower - to_title # boost::locale::to_title - to_upper # boost::locale::to_upper - tolower # LC_CTYPE - toupper # LC_CTYPE - towctrans - towlower # LC_CTYPE - towupper # LC_CTYPE - trim # boost::algorithm::trim - trim_left # boost::algorithm::trim_left - trim_right # boost::algorithm::trim_right - ungetwc - vasprintf - vdprintf - versionsort - vfprintf - vfscanf - vfwprintf - vprintf - vscanf - vsnprintf - vsprintf - vsscanf - vswprintf - vwprintf - wcrtomb - wcscasecmp - wcscoll # LC_COLLATE - wcsftime # LC_TIME - wcsncasecmp - wcsnrtombs - wcsrtombs - wcstod # LC_NUMERIC - wcstof - wcstoimax - wcstol # LC_NUMERIC - wcstold - wcstoll - wcstombs # LC_CTYPE - wcstoul # LC_NUMERIC - wcstoull - wcstoumax - wcswidth - wcsxfrm # LC_COLLATE - wctob - wctomb # LC_CTYPE - wctrans - wctype - wcwidth - wprintf -) - -function join_array { - local IFS="$1" - shift - echo "$*" -} - -REGEXP_IGNORE_KNOWN_VIOLATIONS=$(join_array "|" "${KNOWN_VIOLATIONS[@]}") - -# Invoke "git grep" only once in order to minimize run-time -REGEXP_LOCALE_DEPENDENT_FUNCTIONS=$(join_array "|" "${LOCALE_DEPENDENT_FUNCTIONS[@]}") -GIT_GREP_OUTPUT=$(git grep -nE "[^a-zA-Z0-9_\`'\"<>](${REGEXP_LOCALE_DEPENDENT_FUNCTIONS}(_r|_s)?)[^a-zA-Z0-9_\`'\"<>]" -- ":/*.cpp" ":/*.h") - -EXIT_CODE=0 -for LOCALE_DEPENDENT_FUNCTION in "${LOCALE_DEPENDENT_FUNCTIONS[@]}"; do - MATCHES=$(grep -E "[^a-zA-Z0-9_\`'\"<>]${LOCALE_DEPENDENT_FUNCTION}(_r|_s)?[^a-zA-Z0-9_\`'\"<>]" <<< "${GIT_GREP_OUTPUT}" | \ - grep -vE "\.(c|cpp|h):[0-9]+:\s*(//|\*|/\*|\").*${LOCALE_DEPENDENT_FUNCTION}") - if [[ ${REGEXP_IGNORE_EXTERNAL_DEPENDENCIES} != "" ]]; then - MATCHES=$(grep -vE "${REGEXP_IGNORE_EXTERNAL_DEPENDENCIES}" <<< "${MATCHES}") - fi - if [[ ${REGEXP_IGNORE_KNOWN_VIOLATIONS} != "" ]]; then - MATCHES=$(grep -vE "${REGEXP_IGNORE_KNOWN_VIOLATIONS}" <<< "${MATCHES}") - fi - if [[ ${MATCHES} != "" ]]; then - echo "The locale dependent function ${LOCALE_DEPENDENT_FUNCTION}(...) appears to be used:" - echo "${MATCHES}" - echo - EXIT_CODE=1 - fi -done -if [[ ${EXIT_CODE} != 0 ]]; then - echo "Unnecessary locale dependence can cause bugs that are very" - echo "tricky to isolate and fix. Please avoid using locale dependent" - echo "functions if possible." - echo - echo "Advice not applicable in this specific case? Add an exception" - echo "by updating the ignore list in $0" -fi -exit ${EXIT_CODE}