Merge pull request #61125 from ClickHouse/ci_fast_style

CI: make style check fast
2024-11-28 10:31:57 +00:00 · 2024-03-10 08:17:23 +04:00 · 2024-03-10 08:17:23 +04:00 · c240c33037
commit c240c33037
parent ce6d635fde 681bd28c87
7 changed files with 100 additions and 25 deletions
--- a/docker/test/style/Dockerfile
+++ b/docker/test/style/Dockerfile
@ -60,5 +60,4 @@ RUN arch=${TARGETARCH:-amd64} \
 COPY run.sh /
 COPY process_style_check_result.py /
 CMD ["/bin/bash", "/run.sh"]
--- a/tests/ci/style_check.py
+++ b/tests/ci/style_check.py
@ -1,5 +1,6 @@
 #!/usr/bin/env python3
 import argparse
 from concurrent.futures import ProcessPoolExecutor
 import csv
 import logging
 import os
@ -119,7 +120,7 @@ def checkout_last_ref(pr_info: PRInfo) -> None:
 def main():
    logging.basicConfig(level=logging.INFO)
    logging.getLogger("git_helper").setLevel(logging.DEBUG)
-    args = parse_args()
+    # args = parse_args()
    stopwatch = Stopwatch()
@ -127,28 +128,46 @@ def main():
    temp_path = Path(TEMP_PATH)
    temp_path.mkdir(parents=True, exist_ok=True)
-    pr_info = PRInfo()
+    # pr_info = PRInfo()
    IMAGE_NAME = "clickhouse/style-test"
    image = pull_image(get_docker_image(IMAGE_NAME))
-    cmd = (
+    cmd_1 = (
        f"docker run -u $(id -u ${{USER}}):$(id -g ${{USER}}) --cap-add=SYS_PTRACE "
        f"--volume={repo_path}:/ClickHouse --volume={temp_path}:/test_output "
-        f"{image}"
+        f"--entrypoint= -w/ClickHouse/utils/check-style "
        f"{image} ./check_cpp_docs.sh"
    )
    cmd_2 = (
        f"docker run -u $(id -u ${{USER}}):$(id -g ${{USER}}) --cap-add=SYS_PTRACE "
        f"--volume={repo_path}:/ClickHouse --volume={temp_path}:/test_output "
        f"--entrypoint= -w/ClickHouse/utils/check-style "
        f"{image} ./check_py.sh"
    )
    logging.info("Is going to run the command: %s", cmd_1)
    logging.info("Is going to run the command: %s", cmd_2)
-    if args.push:
+    with ProcessPoolExecutor(max_workers=2) as executor:
-        checkout_head(pr_info)
+        # Submit commands for execution in parallel
        future1 = executor.submit(subprocess.run, cmd_1, shell=True)
        future2 = executor.submit(subprocess.run, cmd_2, shell=True)
        # Wait for both commands to complete
        _ = future1.result()
        _ = future2.result()
    # if args.push:
    #     checkout_head(pr_info)
    logging.info("Is going to run the command: %s", cmd)
    subprocess.check_call(
-        cmd,
+        f"python3 ../../utils/check-style/process_style_check_result.py --in-results-dir {temp_path} "
        f"--out-results-file {temp_path}/test_results.tsv --out-status-file {temp_path}/check_status.tsv || "
        f'echo -e "failure\tCannot parse results" > {temp_path}/check_status.tsv',
        shell=True,
    )
-    if args.push:
+    # if args.push:
-        commit_push_staged(pr_info)
+    #     commit_push_staged(pr_info)
-        checkout_last_ref(pr_info)
+    #     checkout_last_ref(pr_info)
    state, description, test_results, additional_files = process_result(temp_path)
--- a/utils/check-style/check-pylint
+++ b/utils/check-style/check-pylint
@ -0,0 +1,15 @@
 #!/usr/bin/env bash
 LC_ALL="en_US.UTF-8"
 ROOT_PATH=$(git rev-parse --show-toplevel)
 function xargs-pylint {
  # $1 is number maximum arguments per pylint process
  sort | awk '$2=="text/x-script.python" {print $1}' | \
  xargs -P "$(nproc)" -n "$1" pylint --rcfile="$ROOT_PATH/pyproject.toml" --persistent=no --score=n
 }
 find "$ROOT_PATH/tests" -maxdepth 2 -type f -exec file -F' ' --mime-type {} + | xargs-pylint 50
 # Beware, there lambdas are checked. All of them contain `app`, and it causes brain-cucumber-zalgo
 find "$ROOT_PATH/tests/ci" -mindepth 2 -type f -exec file -F' ' --mime-type {} + | xargs-pylint 1
--- a/utils/check-style/check-style
+++ b/utils/check-style/check-style
@ -152,16 +152,6 @@ find $ROOT_PATH/{src,base,programs,utils} -name '*.xml' |
    grep -vP $EXCLUDE_DIRS |
    xargs xmllint --noout --nonet
 function xargs-pylint {
  # $1 is number maximum arguments per pylint process
  sort | awk '$2=="text/x-script.python" {print $1}' | \
  xargs -P "$(nproc)" -n "$1" pylint --rcfile="$ROOT_PATH/pyproject.toml" --persistent=no --score=n
 }
 find "$ROOT_PATH/tests" -maxdepth 2 -type f -exec file -F' ' --mime-type {} + | xargs-pylint 50
 # Beware, there lambdas are checked. All of them contain `app`, and it causes brain-cucumber-zalgo
 find "$ROOT_PATH/tests/ci" -mindepth 2 -type f -exec file -F' ' --mime-type {} + | xargs-pylint 1
 find $ROOT_PATH -not -path $ROOT_PATH'/contrib*' \( -name '*.yaml' -or -name '*.yml' \) -type f |
    grep -vP $EXCLUDE_DIRS |
    xargs yamllint --config-file=$ROOT_PATH/.yamllint
--- a/utils/check-style/check_cpp_docs.sh
+++ b/utils/check-style/check_cpp_docs.sh
@ -0,0 +1,34 @@
 #!/bin/bash
 # yaml check is not the best one
 cd /ClickHouse/utils/check-style || echo -e "failure\tRepo not found" > /test_output/check_status.tsv
 # FIXME: 30 sec to wait
 # echo "Check duplicates" | ts
 # ./check-duplicate-includes.sh |& tee /test_output/duplicate_includes_output.txt
 echo "Check style" | ts
 ./check-style -n              |& tee /test_output/style_output.txt
 echo "Check typos" | ts
 ./check-typos                 |& tee /test_output/typos_output.txt
 echo "Check docs spelling" | ts
 ./check-doc-aspell            |& tee /test_output/docs_spelling_output.txt
 echo "Check whitespaces" | ts
 ./check-whitespaces -n        |& tee /test_output/whitespaces_output.txt
 echo "Check workflows" | ts
 ./check-workflows             |& tee /test_output/workflows_output.txt
 echo "Check submodules" | ts
 ./check-submodules            |& tee /test_output/submodules_output.txt
 echo "Check style. Done" | ts
 # FIXME: 6 min to wait
 # echo "Check shell scripts with shellcheck" | ts
 # ./shellcheck-run.sh           |& tee /test_output/shellcheck_output.txt
 # FIXME: move out
 # /process_style_check_result.py || echo -e "failure\tCannot parse results" > /test_output/check_status.tsv
 # echo "Check help for changelog generator works" | ts
 # cd ../changelog || exit 1
 # ./changelog.py -h 2>/dev/null 1>&2
--- a/utils/check-style/check_py.sh
+++ b/utils/check-style/check_py.sh
@ -0,0 +1,17 @@
 #!/bin/bash
 # yaml check is not the best one
 cd /ClickHouse/utils/check-style || echo -e "failure\tRepo not found" > /test_output/check_status.tsv
 # FIXME: 1 min to wait + head checkout
 # echo "Check python formatting with black" | ts
 # ./check-black -n              |& tee /test_output/black_output.txt
 echo "Check pylint" | ts
 ./check-pylint -n               |& tee /test_output/pylint_output.txt
 echo "Check pylint. Done" | ts
 echo "Check python type hinting with mypy" | ts
 ./check-mypy -n               |& tee /test_output/mypy_output.txt
 echo "Check python type hinting with mypy. Done" | ts
--- a/utils/check-style/process_style_check_result.py
+++ b/utils/check-style/process_style_check_result.py
@ -13,10 +13,11 @@ def process_result(result_folder):
    description = ""
    test_results = []
    checks = (
-        "duplicate includes",
+        #"duplicate includes",
-        "shellcheck",
+        #"shellcheck",
        "style",
-        "black",
+        "pylint",
        #"black",
        "mypy",
        "typos",
        "whitespaces",