2018-05-07 02:27:11 +00:00
#!/usr/bin/env bash
# For code formatting we have clang-format.
#
# But it's not sane to apply clang-format for whole code base,
# because it sometimes makes worse for properly formatted files.
#
# It's only reasonable to blindly apply clang-format only in cases
# when the code is likely to be out of style.
#
# For this purpose we have a script that will use very primitive heuristics
# (simple regexps) to check if the code is likely to have basic style violations.
# and then to run formatter only for the specified files.
ROOT_PATH=$(git rev-parse --show-toplevel)
2020-12-04 02:15:44 +00:00
EXCLUDE_DIRS='build/|integration/|widechar_width/|glibc-compatibility/|memcpy/|consistent-hashing/|Parsers/New'
2018-05-07 02:27:11 +00:00
2020-04-05 23:51:26 +00:00
find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' 2>/dev/null |
2020-02-17 19:09:56 +00:00
grep -vP $EXCLUDE_DIRS |
2020-04-05 23:57:24 +00:00
xargs grep $@ -P '((class|struct|namespace|enum|if|for|while|else|throw|switch).*|\)(\s*const)?(\s*override)?\s*)\{$|\s$|^ {1,3}[^\* ]\S|\t|^\s*(if|else if|if constexpr|else if constexpr|for|while|catch|switch)\(|\( [^\s\\]|\S \)' |
# a curly brace not in a new line, but not for the case of C++11 init or agg. initialization | trailing whitespace | number of ws not a multiple of 4, but not in the case of comment continuation | missing whitespace after for/if/while... before opening brace | whitespaces inside braces
2018-11-24 01:48:06 +00:00
grep -v -P '(//|:\s+\*|\$\(\()| \)"'
2019-06-13 10:45:32 +00:00
# single-line comment | continuation of a multiline comment | a typical piece of embedded shell code | something like ending of raw string literal
2020-04-05 23:57:24 +00:00
# Tabs
find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' 2>/dev/null |
grep -vP $EXCLUDE_DIRS |
xargs grep $@ -F $'\t'
2019-06-13 10:45:32 +00:00
# // namespace comments are unneeded
2020-04-05 23:51:26 +00:00
find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' 2>/dev/null |
2020-02-17 19:09:56 +00:00
grep -vP $EXCLUDE_DIRS |
2019-06-13 10:45:32 +00:00
xargs grep $@ -P '}\s*//+\s*namespace\s*'
2019-08-23 15:10:33 +00:00
# Broken symlinks
2020-02-17 19:09:56 +00:00
find -L $ROOT_PATH -type l 2>/dev/null | grep -v contrib && echo "^ Broken symlinks found"
2019-08-23 18:30:04 +00:00
# Double whitespaces
2020-12-04 02:15:44 +00:00
find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' 2>/dev/null |
grep -vP $EXCLUDE_DIRS |
while read i; do $ROOT_PATH/utils/check-style/double-whitespaces.pl < $i || echo -e "^ File $i contains double whitespaces\n"; done
2020-02-25 14:35:37 +00:00
# Unused ErrorCodes
2020-02-25 18:20:08 +00:00
# NOTE: to fix automatically, replace echo with:
# sed -i "/extern const int $code/d" $file
2020-12-04 02:15:44 +00:00
find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' |
grep -vP $EXCLUDE_DIRS |
xargs grep -l -P 'extern const int [_A-Z]+' | while read file; do grep -P 'extern const int [_A-Z]+;' $file | sed -r -e 's/^.*?extern const int ([_A-Z]+);.*?$/\1/' | while read code; do grep -q "ErrorCodes::$code" $file || echo "ErrorCode $code is defined but not used in file $file"; done; done
2020-02-25 14:35:37 +00:00
# Undefined ErrorCodes
2020-02-25 18:20:08 +00:00
# NOTE: to fix automatically, replace echo with:
# ( grep -q -F 'namespace ErrorCodes' $file && sed -i -r "0,/(\s*)extern const int [_A-Z]+/s//\1extern const int $code;\n&/" $file || awk '{ print; if (ns == 1) { ns = 2 }; if (ns == 2) { ns = 0; print "namespace ErrorCodes\n{\n extern const int '$code';\n}" } }; /namespace DB/ { ns = 1; };' < $file > ${file}.tmp && mv ${file}.tmp $file )
2020-12-04 02:15:44 +00:00
find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' |
grep -vP $EXCLUDE_DIRS |
xargs grep -l -P 'ErrorCodes::[_A-Z]+' | while read file; do grep -P 'ErrorCodes::[_A-Z]+' $file | sed -r -e 's/^.*?ErrorCodes::([_A-Z]+).*?$/\1/' | while read code; do grep -q "extern const int $code" $file || echo "ErrorCode $code is used in file $file but not defined"; done; done
2020-02-25 14:35:37 +00:00
# Duplicate ErrorCodes
2020-12-04 02:15:44 +00:00
find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' |
grep -vP $EXCLUDE_DIRS |
xargs grep -l -P 'ErrorCodes::[_A-Z]+' | while read file; do grep -P 'extern const int [_A-Z]+;' $file | sort | uniq -c | grep -v -P ' +1 ' && echo "Duplicate ErrorCode in file $file"; done
2020-03-03 02:45:20 +00:00
# Three or more consecutive empty lines
2020-12-04 02:15:44 +00:00
find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' |
grep -vP $EXCLUDE_DIRS |
while read file; do awk '/^$/ { ++i; if (i > 2) { print "More than two consecutive empty lines in file '$file'" } } /./ { i = 0 }' $file; done
2020-03-07 03:28:03 +00:00
# Broken XML files (requires libxml2-utils)
2020-12-04 02:15:44 +00:00
find $ROOT_PATH/{src,base,programs,utils} -name '*.xml' |
grep -vP $EXCLUDE_DIRS |
xargs xmllint --noout --nonet
2020-04-11 15:54:16 +00:00
2021-01-26 21:14:23 +00:00
# FIXME: for now only clickhouse-test
2021-01-29 20:11:53 +00:00
pylint --rcfile=$ROOT_PATH/.pylintrc --score=n $ROOT_PATH/tests/clickhouse-test
2021-01-26 21:14:23 +00:00
2021-02-21 07:45:05 +00:00
find $ROOT_PATH -not -path $ROOT_PATH'/contrib*' \( -name '*.yaml' -or -name '*.yml' \) -type f |
2021-02-20 18:52:44 +00:00
grep -vP $EXCLUDE_DIRS |
xargs yamllint --config-file=$ROOT_PATH/.yamllint
2020-04-11 15:54:16 +00:00
# Machine translation to Russian is strictly prohibited
2020-12-04 02:15:44 +00:00
find $ROOT_PATH/docs/ru -name '*.md' |
grep -vP $EXCLUDE_DIRS |
xargs grep -l -F 'machine_translated: true'
2020-06-20 06:28:19 +00:00
# Tests should not be named with "fail" in their names. It makes looking at the results less convenient.
2020-12-04 02:15:44 +00:00
find $ROOT_PATH/tests/queries -iname '*fail*' |
grep -vP $EXCLUDE_DIRS |
grep . && echo 'Tests should not be named with "fail" in their names. It makes looking at the results less convenient when you search for "fail" substring in browser.'
2020-08-06 03:13:02 +00:00
2021-01-26 19:55:06 +00:00
# Queries to system.query_log/system.query_thread_log should have current_database = currentDatabase() condition
# NOTE: it is not that accuate, but at least something.
tests_with_query_log=( $(
find $ROOT_PATH/tests/queries -iname '*.sql' -or -iname '*.sh' -or -iname '*.py' |
grep -vP $EXCLUDE_DIRS |
xargs grep --with-filename -e system.query_log -e system.query_thread_log | cut -d: -f1 | sort -u
) )
for test_case in "${tests_with_query_log[@]}"; do
grep -qE current_database.*currentDatabase "$test_case" || echo "Queries to system.query_log/system.query_thread_log does not have current_database = currentDatabase() condition in $test_case"
done
2021-03-15 05:14:28 +00:00
# Queries with ReplicatedMergeTree
# NOTE: it is not that accuate, but at least something.
tests_with_replicated_merge_tree=( $(
find $ROOT_PATH/tests/queries -iname '*.sql' -or -iname '*.sh' -or -iname '*.py' |
grep -vP $EXCLUDE_DIRS |
xargs grep --with-filename -e ReplicatedMergeTree | cut -d: -f1 | sort -u
) )
for test_case in "${tests_with_replicated_merge_tree[@]}"; do
case "$test_case" in
*.sh)
test_case_zk_prefix="\$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX"
grep -q -e "ReplicatedMergeTree.*$test_case_zk_prefix" "$test_case" || echo "ReplicatedMergeTree should contain '$test_case_zk_prefix' in zookeeper path to avoid overlaps ($test_case)"
;;
*.sql)
# NOTE: *.sql is not supported because it is not possible right now, because:
# - ReplicatedMergeTree supports only ASTLiteral for zookeeper path
# (and adding support of other nodes, with evaluating them are not that easy, due to zk_prefix is "optional")
# - Hence concat(currentDatabase(), 'foo')
# - Also params cannot be used, because the are wrapped with CAST()
#
# But hopefully they will not be a problem
# (since they do not do any "stressing" and overlap probability should be lower).
;;
*.py)
# Right now there is not such tests anyway
echo "No ReplicatedMergeTree style check for *.py ($test_case)"
;;
esac
done
2020-08-06 03:13:02 +00:00
# All the submodules should be from https://github.com/
find $ROOT_PATH -name '.gitmodules' | while read i; do grep -F 'url = ' $i | grep -v -F 'https://github.com/' && echo 'All the submodules should be from https://github.com/'; done
2020-08-08 02:08:36 +00:00
# There shouldn't be any code snippets under GPL or LGPL
find $ROOT_PATH/{src,base,programs} -name '*.h' -or -name '*.cpp' 2>/dev/null | xargs grep -i -F 'General Public License' && echo "There shouldn't be any code snippets under GPL or LGPL"
2020-08-08 01:42:00 +00:00
2020-08-14 14:41:03 +00:00
# There shouldn't be any docker containers outside docker directory
find $ROOT_PATH -not -path $ROOT_PATH'/docker*' -not -path $ROOT_PATH'/contrib*' -name Dockerfile -type f 2>/dev/null | xargs --no-run-if-empty -n1 echo "Please move Dockerfile to docker directory:"
# There shouldn't be any docker compose files outside docker directory
2020-08-15 04:23:23 +00:00
#find $ROOT_PATH -not -path $ROOT_PATH'/tests/testflows*' -not -path $ROOT_PATH'/docker*' -not -path $ROOT_PATH'/contrib*' -name '*compose*.yml' -type f 2>/dev/null | xargs --no-run-if-empty grep -l "version:" | xargs --no-run-if-empty -n1 echo "Please move docker compose to docker directory:"
2020-08-19 09:40:28 +00:00
# Check that ya.make files are auto-generated
"$ROOT_PATH"/utils/generate-ya-make/generate-ya-make.sh
2021-01-29 20:20:57 +00:00
# FIXME: apparently sandbox (don't confuse it with docker) cloning sources
# using some ancient git version, <2.8, that contains one bug for submodules
# initialization [1]:
#
# " * A partial rewrite of "git submodule" in the 2.7 timeframe changed
# the way the gitdir: pointer in the submodules point at the real
# repository location to use absolute paths by accident. This has
# been corrected."
#
# [1]: https://github.com/git/git/blob/cf11a67975b057a144618badf16dc4e3d25b9407/Documentation/RelNotes/2.8.3.txt#L33-L36
#
# Due to which "git status" will report the following error:
#
# fatal: not a git repository: /place/sandbox-data/tasks/0/2/882869720/ClickHouse/.git/modules/contrib/AMQP-CPP
#
# Anyway this check does not requires any submodule traverse, so it is fine to ignore those errors.
git status -uno 2> >(grep "fatal: not a git repository: /place/sandbox-data/tasks/.*/ClickHouse/\\.git/modules/contrib") | grep ya.make && echo "ya.make files should be generated with utils/generate-ya-make/generate-ya-make.sh"
2020-10-10 18:37:02 +00:00
# Check that every header file has #pragma once in first line
2020-12-04 02:15:44 +00:00
find $ROOT_PATH/{src,programs,utils} -name '*.h' |
grep -vP $EXCLUDE_DIRS |
while read file; do [[ $(head -n1 $file) != '#pragma once' ]] && echo "File $file must have '#pragma once' in first line"; done
2020-10-11 16:26:11 +00:00
# Check for executable bit on non-executable files
2020-10-26 19:17:01 +00:00
find $ROOT_PATH/{src,base,programs,utils,tests,docs,website,cmake} '(' -name '*.cpp' -or -name '*.h' -or -name '*.sql' -or -name '*.xml' -or -name '*.reference' -or -name '*.txt' -or -name '*.md' ')' -and -executable | grep -P '.' && echo "These files should not be executable."
2020-10-24 06:23:54 +00:00
# Check for BOM
2020-10-26 19:17:01 +00:00
find $ROOT_PATH/{src,base,programs,utils,tests,docs,website,cmake} -name '*.md' -or -name '*.cpp' -or -name '*.h' | xargs grep -l -F $'\xEF\xBB\xBF' | grep -P '.' && echo "Files should not have UTF-8 BOM"
find $ROOT_PATH/{src,base,programs,utils,tests,docs,website,cmake} -name '*.md' -or -name '*.cpp' -or -name '*.h' | xargs grep -l -F $'\xFF\xFE' | grep -P '.' && echo "Files should not have UTF-16LE BOM"
find $ROOT_PATH/{src,base,programs,utils,tests,docs,website,cmake} -name '*.md' -or -name '*.cpp' -or -name '*.h' | xargs grep -l -F $'\xFE\xFF' | grep -P '.' && echo "Files should not have UTF-16BE BOM"
2020-10-24 21:54:17 +00:00
# Too many exclamation marks
2020-12-04 02:15:44 +00:00
find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' |
grep -vP $EXCLUDE_DIRS |
xargs grep -F '!!!' | grep -P '.' && echo "Too many exclamation marks (looks dirty, unconfident)."
2020-10-26 19:12:40 +00:00
# Trailing whitespaces
2020-12-04 02:15:44 +00:00
find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' |
grep -vP $EXCLUDE_DIRS |
xargs grep -P ' $' | grep -P '.' && echo "^ Trailing whitespaces."
2020-11-09 13:07:38 +00:00
# Forbid stringstream because it's easy to use them incorrectly and hard to debug possible issues
2020-12-04 02:15:44 +00:00
find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' |
grep -vP $EXCLUDE_DIRS |
2020-12-21 21:55:54 +00:00
xargs grep -P 'std::[io]?stringstream' | grep -v "STYLE_CHECK_ALLOW_STD_STRING_STREAM" && echo "Use WriteBufferFromOwnString or ReadBufferFromString instead of std::stringstream"
# Conflict markers
find $ROOT_PATH/{src,base,programs,utils,tests,docs,website,cmake} -name '*.md' -or -name '*.cpp' -or -name '*.h' |
xargs grep -P '^(<<<<<<<|=======|>>>>>>>)$' | grep -P '.' && echo "Conflict markers are found in files"
2021-01-22 14:27:23 +00:00
# Forbid subprocess.check_call(...) in integration tests because it does not provide enough information on errors
find $ROOT_PATH'/tests/integration' -name '*.py' |
xargs grep -F 'subprocess.check_call' | grep -v "STYLE_CHECK_ALLOW_SUBPROCESS_CHECK_CALL" && echo "Use helpers.cluster.run_and_check or subprocess.run instead of subprocess.check_call to print detailed info on error"