ClickHouse/tests/ci/style_check.py

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

248 lines
8.8 KiB
Python
Raw Normal View History

2021-09-10 11:52:21 +00:00
#!/usr/bin/env python3
import argparse
import csv
2021-09-10 11:52:21 +00:00
import logging
import os
import shutil
import subprocess
2021-12-01 14:23:51 +00:00
import sys
2024-03-11 12:07:39 +00:00
from concurrent.futures import ProcessPoolExecutor
from pathlib import Path
from typing import List, Tuple, Union
2021-12-01 14:23:51 +00:00
import magic
2023-12-18 12:21:13 +00:00
from docker_images_helper import get_docker_image, pull_image
from env_helper import GITHUB_EVENT_PATH, IS_CI, REPO_COPY, TEMP_PATH
from git_helper import GIT_PREFIX, git_runner
from pr_info import PRInfo
from report import (
ERROR,
FAIL,
FAILURE,
SUCCESS,
JobReport,
TestResults,
read_test_results,
)
from ssh import SSHKey
from stopwatch import Stopwatch
2021-09-10 14:27:03 +00:00
def process_result(
2023-09-22 11:16:46 +00:00
result_directory: Path,
) -> Tuple[str, str, TestResults, List[Path]]:
test_results = [] # type: TestResults
2021-09-10 11:52:21 +00:00
additional_files = []
2023-09-22 11:16:46 +00:00
# Just upload all files from result_directory.
2022-01-13 19:52:02 +00:00
# If task provides processed results, then it's responsible
2023-09-22 11:16:46 +00:00
# for content of result_directory.
if result_directory.exists():
additional_files = [p for p in result_directory.iterdir() if p.is_file()]
2021-09-10 11:52:21 +00:00
2021-12-12 12:09:44 +00:00
status = []
2023-09-22 11:16:46 +00:00
status_path = result_directory / "check_status.tsv"
if status_path.exists():
logging.info("Found check_status.tsv")
2022-01-13 19:52:02 +00:00
with open(status_path, "r", encoding="utf-8") as status_file:
status = list(csv.reader(status_file, delimiter="\t"))
2021-09-10 11:52:21 +00:00
if len(status) != 1 or len(status[0]) != 2:
2023-09-22 11:16:46 +00:00
logging.info("Files in result folder %s", os.listdir(result_directory))
return ERROR, "Invalid check_status.tsv", test_results, additional_files
2021-09-10 11:52:21 +00:00
state, description = status[0][0], status[0][1]
try:
2023-09-22 11:16:46 +00:00
results_path = result_directory / "test_results.tsv"
test_results = read_test_results(results_path)
2021-09-10 11:52:21 +00:00
if len(test_results) == 0:
2024-02-26 17:46:15 +00:00
raise ValueError("Empty results")
2021-09-10 11:52:21 +00:00
return state, description, test_results, additional_files
except Exception:
if state == SUCCESS:
state, description = ERROR, "Failed to read test_results.tsv"
2021-09-10 11:52:21 +00:00
return state, description, test_results, additional_files
def parse_args():
parser = argparse.ArgumentParser("Check and report style issues in the repository")
parser.add_argument("--push", default=True, help=argparse.SUPPRESS)
parser.add_argument(
"--no-push",
action="store_false",
dest="push",
help="do not commit and push automatic fixes",
default=argparse.SUPPRESS,
)
return parser.parse_args()
def commit_push_staged(pr_info: PRInfo) -> None:
# It works ONLY for PRs, and only over ssh, so either
# ROBOT_CLICKHOUSE_SSH_KEY should be set or ssh-agent should work
assert pr_info.number
if not pr_info.head_name == pr_info.base_name:
# We can't push to forks, sorry folks
return
git_staged = git_runner("git diff --cached --name-only")
if not git_staged:
logging.info("No fixes are staged")
return
def push_fix() -> None:
"""
Stash staged changes to commit them on the top of the PR's head.
`pull_request` event runs on top of a temporary merge_commit, we need to avoid
including it in the autofix
"""
remote_url = pr_info.event["pull_request"]["base"]["repo"]["ssh_url"]
head = pr_info.sha
git_runner(f"{GIT_PREFIX} commit -m 'Automatic style fix'")
fix_commit = git_runner("git rev-parse HEAD")
logging.info(
"Fetching PR's head, check it out and cherry-pick autofix: %s", head
)
git_runner(
f"{GIT_PREFIX} fetch {remote_url} --no-recurse-submodules --depth=1 {head}"
)
git_runner(f"git reset --hard {head}")
git_runner(f"{GIT_PREFIX} cherry-pick {fix_commit}")
git_runner(f"{GIT_PREFIX} push {remote_url} HEAD:{pr_info.head_ref}")
if os.getenv("ROBOT_CLICKHOUSE_SSH_KEY", ""):
with SSHKey("ROBOT_CLICKHOUSE_SSH_KEY"):
push_fix()
2024-03-11 19:57:35 +00:00
return
push_fix()
def _check_mime(file: Union[Path, str], mime: str) -> bool:
2024-03-12 09:50:44 +00:00
# WARNING: python-magic v2:0.4.24-2 is used in ubuntu 22.04,
# and `Support os.PathLike values in magic.from_file` is only from 0.4.25
try:
return bool(magic.from_file(os.path.join(REPO_COPY, file), mime=True) == mime)
except (IsADirectoryError, FileNotFoundError) as e:
# Process submodules and removed files w/o errors
logging.warning("Captured error on file '%s': %s", file, e)
return False
def is_python(file: Union[Path, str]) -> bool:
"""returns if the changed file in the repository is python script"""
return (
_check_mime(file, "text/x-script.python")
or str(file).endswith(".py")
or str(file) == "pyproject.toml"
)
def is_shell(file: Union[Path, str]) -> bool:
2024-03-12 09:50:44 +00:00
"""returns if the changed file in the repository is shell script"""
return _check_mime(file, "text/x-shellscript") or str(file).endswith(".sh")
2023-12-18 12:21:13 +00:00
def main():
2021-09-10 14:27:03 +00:00
logging.basicConfig(level=logging.INFO)
logging.getLogger("git_helper").setLevel(logging.DEBUG)
2024-03-11 12:07:39 +00:00
args = parse_args()
2021-11-19 14:47:04 +00:00
stopwatch = Stopwatch()
repo_path = Path(REPO_COPY)
2023-10-04 12:28:38 +00:00
temp_path = Path(TEMP_PATH)
if temp_path.is_dir():
shutil.rmtree(temp_path)
2023-09-22 11:16:46 +00:00
temp_path.mkdir(parents=True, exist_ok=True)
2021-09-15 12:59:39 +00:00
pr_info = PRInfo()
2024-04-08 14:15:49 +00:00
2024-04-17 20:23:41 +00:00
if pr_info.is_merge_queue and args.push:
2024-04-08 14:15:49 +00:00
print("Auto style fix will be disabled for Merge Queue workflow")
args.push = False
2024-03-12 09:50:44 +00:00
run_cpp_check = True
run_shell_check = True
run_python_check = True
2024-06-10 09:18:03 +00:00
if IS_CI and pr_info.number > 0:
2024-03-12 09:50:44 +00:00
pr_info.fetch_changed_files()
run_cpp_check = any(
not (is_python(file) or is_shell(file)) for file in pr_info.changed_files
2024-03-12 09:50:44 +00:00
)
run_shell_check = any(is_shell(file) for file in pr_info.changed_files)
run_python_check = any(is_python(file) for file in pr_info.changed_files)
2021-09-10 11:52:21 +00:00
IMAGE_NAME = "clickhouse/style-test"
image = pull_image(get_docker_image(IMAGE_NAME))
2024-03-12 09:50:44 +00:00
docker_command = (
2022-01-13 19:52:02 +00:00
f"docker run -u $(id -u ${{USER}}):$(id -g ${{USER}}) --cap-add=SYS_PTRACE "
f"--volume={repo_path}:/ClickHouse --volume={temp_path}:/test_output "
2024-03-12 09:50:44 +00:00
f"--entrypoint= -w/ClickHouse/utils/check-style {image}"
)
2024-03-12 09:50:44 +00:00
cmd_docs = f"{docker_command} ./check_docs.sh"
cmd_cpp = f"{docker_command} ./check_cpp.sh"
cmd_py = f"{docker_command} ./check_py.sh"
cmd_shell = f"{docker_command} ./check_shell.sh"
2024-03-09 20:38:23 +00:00
with ProcessPoolExecutor(max_workers=2) as executor:
logging.info("Run docs files check: %s", cmd_docs)
future = executor.submit(subprocess.run, cmd_docs, shell=True)
# Parallelization does not make it faster - run subsequently
_ = future.result()
2024-03-12 09:50:44 +00:00
if run_cpp_check:
logging.info("Run source files check: %s", cmd_cpp)
2024-03-12 09:50:44 +00:00
future = executor.submit(subprocess.run, cmd_cpp, shell=True)
_ = future.result()
2024-03-12 09:50:44 +00:00
if run_python_check:
logging.info("Run py files check: %s", cmd_py)
2024-03-12 09:50:44 +00:00
future = executor.submit(subprocess.run, cmd_py, shell=True)
_ = future.result()
if run_shell_check:
logging.info("Run shellcheck check: %s", cmd_shell)
future = executor.submit(subprocess.run, cmd_shell, shell=True)
_ = future.result()
2022-02-03 13:06:21 +00:00
subprocess.check_call(
2024-03-09 20:38:23 +00:00
f"python3 ../../utils/check-style/process_style_check_result.py --in-results-dir {temp_path} "
f"--out-results-file {temp_path}/test_results.tsv --out-status-file {temp_path}/check_status.tsv || "
f'echo -e "failure\tCannot parse results" > {temp_path}/check_status.tsv',
2022-01-13 19:52:02 +00:00
shell=True,
)
2022-02-03 13:06:21 +00:00
2021-09-15 10:50:30 +00:00
state, description, test_results, additional_files = process_result(temp_path)
2021-11-19 14:47:04 +00:00
autofix_description = ""
2024-09-26 21:42:26 +00:00
push_fix = args.push
for result in test_results:
2024-09-26 21:42:26 +00:00
if result.status in (FAILURE, FAIL) and push_fix:
# do not autofix if something besides isort and black is failed
push_fix = any(result.name.endswith(check) for check in ("isort", "black"))
2024-09-26 21:42:26 +00:00
if push_fix:
try:
commit_push_staged(pr_info)
except subprocess.SubprocessError:
# do not fail the whole script if the autofix didn't work out
logging.error("Unable to push the autofix. Continue.")
autofix_description = "Failed to push autofix to the PR. "
JobReport(
description=f"{autofix_description}{description}",
test_results=test_results,
status=state,
start_time=stopwatch.start_time_str,
duration=stopwatch.duration_seconds,
# add GITHUB_EVENT_PATH json file to have it in style check report. sometimes it's needed for debugging.
additional_files=additional_files + [Path(GITHUB_EVENT_PATH)],
).dump()
2022-03-29 12:41:47 +00:00
if state in [ERROR, FAILURE]:
print(f"Style check failed: [{description}]")
2022-03-29 12:41:47 +00:00
sys.exit(1)
if __name__ == "__main__":
main()