From 7f061ba89c5d598132c034cce980be714e9ef32c Mon Sep 17 00:00:00 2001
From: Max Kainov <max.kainov@clickhouse.com>
Date: Sat, 9 Mar 2024 13:43:37 +0000
Subject: [PATCH 1/2] CI: add wf class in ci_config

 #do_not_test
---
 tests/ci/ci.py        |  2 +-
 tests/ci/ci_config.py | 11 ++++++++++-
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/tests/ci/ci.py b/tests/ci/ci.py
index 6b754787d5e..1eec9a6771b 100644
--- a/tests/ci/ci.py
+++ b/tests/ci/ci.py
@@ -1111,7 +1111,7 @@ def _configure_jobs(
     digests: Dict[str, str] = {}
 
     print("::group::Job Digests")
-    for job in CI_CONFIG.job_generator():
+    for job in CI_CONFIG.job_generator(pr_info.head_ref):
         digest = job_digester.get_job_digest(CI_CONFIG.get_digest_config(job))
         digests[job] = digest
         print(f"    job [{job.rjust(50)}] has digest [{digest}]")
diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py
index 44dea116cbe..df8bfb1c2a8 100644
--- a/tests/ci/ci_config.py
+++ b/tests/ci/ci_config.py
@@ -11,6 +11,14 @@ from ci_utils import WithIter
 from integration_test_images import IMAGES
 
 
+class WorkFlows(metaclass=WithIter):
+    PULL_REQUEST = "PULL_REQUEST"
+    MASTER = "MASTER"
+    BACKPORT = "BACKPORT"
+    RELEASE = "RELEASE"
+    SYNC = "SYNC"
+
+
 class CIStages(metaclass=WithIter):
     NA = "UNKNOWN"
     BUILDS_1 = "Builds_1"
@@ -694,10 +702,11 @@ class CIConfig:
         ), f"Invalid check_name or CI_CONFIG outdated, config not found for [{check_name}]"
         return res  # type: ignore
 
-    def job_generator(self) -> Iterable[str]:
+    def job_generator(self, branch: str) -> Iterable[str]:
         """
         traverses all check names in CI pipeline
         """
+        assert branch
         for config in (
             self.other_jobs_configs,
             self.build_config,

From 681bd28c877a365b8f18f6325c00e697ee83baef Mon Sep 17 00:00:00 2001
From: Max Kainov <max.kainov@clickhouse.com>
Date: Sat, 9 Mar 2024 20:38:23 +0000
Subject: [PATCH 2/2] CI: make style check fast  #do_not_test

---
 docker/test/style/Dockerfile                  |  1 -
 tests/ci/style_check.py                       | 41 ++++++++++++++-----
 utils/check-style/check-pylint                | 15 +++++++
 utils/check-style/check-style                 | 10 -----
 utils/check-style/check_cpp_docs.sh           | 34 +++++++++++++++
 utils/check-style/check_py.sh                 | 17 ++++++++
 .../process_style_check_result.py             |  7 ++--
 7 files changed, 100 insertions(+), 25 deletions(-)
 create mode 100755 utils/check-style/check-pylint
 create mode 100755 utils/check-style/check_cpp_docs.sh
 create mode 100755 utils/check-style/check_py.sh
 rename {docker/test/style => utils/check-style}/process_style_check_result.py (96%)

diff --git a/docker/test/style/Dockerfile b/docker/test/style/Dockerfile
index 122f558bab2..b4ffcfb597c 100644
--- a/docker/test/style/Dockerfile
+++ b/docker/test/style/Dockerfile
@@ -60,5 +60,4 @@ RUN arch=${TARGETARCH:-amd64} \
 
 
 COPY run.sh /
-COPY process_style_check_result.py /
 CMD ["/bin/bash", "/run.sh"]
diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py
index d004f3ed215..d0565e136d3 100644
--- a/tests/ci/style_check.py
+++ b/tests/ci/style_check.py
@@ -1,5 +1,6 @@
 #!/usr/bin/env python3
 import argparse
+from concurrent.futures import ProcessPoolExecutor
 import csv
 import logging
 import os
@@ -119,7 +120,7 @@ def checkout_last_ref(pr_info: PRInfo) -> None:
 def main():
     logging.basicConfig(level=logging.INFO)
     logging.getLogger("git_helper").setLevel(logging.DEBUG)
-    args = parse_args()
+    # args = parse_args()
 
     stopwatch = Stopwatch()
 
@@ -127,28 +128,46 @@ def main():
     temp_path = Path(TEMP_PATH)
     temp_path.mkdir(parents=True, exist_ok=True)
 
-    pr_info = PRInfo()
+    # pr_info = PRInfo()
 
     IMAGE_NAME = "clickhouse/style-test"
     image = pull_image(get_docker_image(IMAGE_NAME))
-    cmd = (
+    cmd_1 = (
         f"docker run -u $(id -u ${{USER}}):$(id -g ${{USER}}) --cap-add=SYS_PTRACE "
         f"--volume={repo_path}:/ClickHouse --volume={temp_path}:/test_output "
-        f"{image}"
+        f"--entrypoint= -w/ClickHouse/utils/check-style "
+        f"{image} ./check_cpp_docs.sh"
     )
+    cmd_2 = (
+        f"docker run -u $(id -u ${{USER}}):$(id -g ${{USER}}) --cap-add=SYS_PTRACE "
+        f"--volume={repo_path}:/ClickHouse --volume={temp_path}:/test_output "
+        f"--entrypoint= -w/ClickHouse/utils/check-style "
+        f"{image} ./check_py.sh"
+    )
+    logging.info("Is going to run the command: %s", cmd_1)
+    logging.info("Is going to run the command: %s", cmd_2)
 
-    if args.push:
-        checkout_head(pr_info)
+    with ProcessPoolExecutor(max_workers=2) as executor:
+        # Submit commands for execution in parallel
+        future1 = executor.submit(subprocess.run, cmd_1, shell=True)
+        future2 = executor.submit(subprocess.run, cmd_2, shell=True)
+        # Wait for both commands to complete
+        _ = future1.result()
+        _ = future2.result()
+
+    # if args.push:
+    #     checkout_head(pr_info)
 
-    logging.info("Is going to run the command: %s", cmd)
     subprocess.check_call(
-        cmd,
+        f"python3 ../../utils/check-style/process_style_check_result.py --in-results-dir {temp_path} "
+        f"--out-results-file {temp_path}/test_results.tsv --out-status-file {temp_path}/check_status.tsv || "
+        f'echo -e "failure\tCannot parse results" > {temp_path}/check_status.tsv',
         shell=True,
     )
 
-    if args.push:
-        commit_push_staged(pr_info)
-        checkout_last_ref(pr_info)
+    # if args.push:
+    #     commit_push_staged(pr_info)
+    #     checkout_last_ref(pr_info)
 
     state, description, test_results, additional_files = process_result(temp_path)
 
diff --git a/utils/check-style/check-pylint b/utils/check-style/check-pylint
new file mode 100755
index 00000000000..7959a414023
--- /dev/null
+++ b/utils/check-style/check-pylint
@@ -0,0 +1,15 @@
+#!/usr/bin/env bash
+
+
+LC_ALL="en_US.UTF-8"
+ROOT_PATH=$(git rev-parse --show-toplevel)
+
+function xargs-pylint {
+  # $1 is number maximum arguments per pylint process
+  sort | awk '$2=="text/x-script.python" {print $1}' | \
+  xargs -P "$(nproc)" -n "$1" pylint --rcfile="$ROOT_PATH/pyproject.toml" --persistent=no --score=n
+}
+
+find "$ROOT_PATH/tests" -maxdepth 2 -type f -exec file -F' ' --mime-type {} + | xargs-pylint 50
+# Beware, there lambdas are checked. All of them contain `app`, and it causes brain-cucumber-zalgo
+find "$ROOT_PATH/tests/ci" -mindepth 2 -type f -exec file -F' ' --mime-type {} + | xargs-pylint 1
diff --git a/utils/check-style/check-style b/utils/check-style/check-style
index 3a5d0c053ea..d7387c3f843 100755
--- a/utils/check-style/check-style
+++ b/utils/check-style/check-style
@@ -152,16 +152,6 @@ find $ROOT_PATH/{src,base,programs,utils} -name '*.xml' |
     grep -vP $EXCLUDE_DIRS |
     xargs xmllint --noout --nonet
 
-function xargs-pylint {
-  # $1 is number maximum arguments per pylint process
-  sort | awk '$2=="text/x-script.python" {print $1}' | \
-  xargs -P "$(nproc)" -n "$1" pylint --rcfile="$ROOT_PATH/pyproject.toml" --persistent=no --score=n
-}
-
-find "$ROOT_PATH/tests" -maxdepth 2 -type f -exec file -F' ' --mime-type {} + | xargs-pylint 50
-# Beware, there lambdas are checked. All of them contain `app`, and it causes brain-cucumber-zalgo
-find "$ROOT_PATH/tests/ci" -mindepth 2 -type f -exec file -F' ' --mime-type {} + | xargs-pylint 1
-
 find $ROOT_PATH -not -path $ROOT_PATH'/contrib*' \( -name '*.yaml' -or -name '*.yml' \) -type f |
     grep -vP $EXCLUDE_DIRS |
     xargs yamllint --config-file=$ROOT_PATH/.yamllint
diff --git a/utils/check-style/check_cpp_docs.sh b/utils/check-style/check_cpp_docs.sh
new file mode 100755
index 00000000000..7ad3cede758
--- /dev/null
+++ b/utils/check-style/check_cpp_docs.sh
@@ -0,0 +1,34 @@
+#!/bin/bash
+
+# yaml check is not the best one
+
+cd /ClickHouse/utils/check-style || echo -e "failure\tRepo not found" > /test_output/check_status.tsv
+
+# FIXME: 30 sec to wait
+# echo "Check duplicates" | ts
+# ./check-duplicate-includes.sh |& tee /test_output/duplicate_includes_output.txt
+
+echo "Check style" | ts
+./check-style -n              |& tee /test_output/style_output.txt
+echo "Check typos" | ts
+./check-typos                 |& tee /test_output/typos_output.txt
+echo "Check docs spelling" | ts
+./check-doc-aspell            |& tee /test_output/docs_spelling_output.txt
+echo "Check whitespaces" | ts
+./check-whitespaces -n        |& tee /test_output/whitespaces_output.txt
+echo "Check workflows" | ts
+./check-workflows             |& tee /test_output/workflows_output.txt
+echo "Check submodules" | ts
+./check-submodules            |& tee /test_output/submodules_output.txt
+echo "Check style. Done" | ts
+
+# FIXME: 6 min to wait
+# echo "Check shell scripts with shellcheck" | ts
+# ./shellcheck-run.sh           |& tee /test_output/shellcheck_output.txt
+
+
+# FIXME: move out
+# /process_style_check_result.py || echo -e "failure\tCannot parse results" > /test_output/check_status.tsv
+# echo "Check help for changelog generator works" | ts
+# cd ../changelog || exit 1
+# ./changelog.py -h 2>/dev/null 1>&2
diff --git a/utils/check-style/check_py.sh b/utils/check-style/check_py.sh
new file mode 100755
index 00000000000..48c02013734
--- /dev/null
+++ b/utils/check-style/check_py.sh
@@ -0,0 +1,17 @@
+#!/bin/bash
+
+# yaml check is not the best one
+
+cd /ClickHouse/utils/check-style || echo -e "failure\tRepo not found" > /test_output/check_status.tsv
+
+# FIXME: 1 min to wait + head checkout
+# echo "Check python formatting with black" | ts
+# ./check-black -n              |& tee /test_output/black_output.txt
+
+echo "Check pylint" | ts
+./check-pylint -n               |& tee /test_output/pylint_output.txt
+echo "Check pylint. Done" | ts
+
+echo "Check python type hinting with mypy" | ts
+./check-mypy -n               |& tee /test_output/mypy_output.txt
+echo "Check python type hinting with mypy. Done" | ts
diff --git a/docker/test/style/process_style_check_result.py b/utils/check-style/process_style_check_result.py
similarity index 96%
rename from docker/test/style/process_style_check_result.py
rename to utils/check-style/process_style_check_result.py
index bc06df1af31..7980c01dd37 100755
--- a/docker/test/style/process_style_check_result.py
+++ b/utils/check-style/process_style_check_result.py
@@ -13,10 +13,11 @@ def process_result(result_folder):
     description = ""
     test_results = []
     checks = (
-        "duplicate includes",
-        "shellcheck",
+        #"duplicate includes",
+        #"shellcheck",
         "style",
-        "black",
+        "pylint",
+        #"black",
         "mypy",
         "typos",
         "whitespaces",