Merge pull request #55476 from ClickHouse/ci_scripts_tuning

Integration  check script fix ups
This commit is contained in:
Mikhail f. Shiryaev 2023-10-16 17:06:19 +02:00 committed by GitHub
commit f96bda1deb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 102 additions and 36 deletions

View File

@ -67,6 +67,48 @@ This check means that the CI system started to process the pull request. When it
Performs some simple regex-based checks of code style, using the [`utils/check-style/check-style`](https://github.com/ClickHouse/ClickHouse/blob/master/utils/check-style/check-style) binary (note that it can be run locally).
If it fails, fix the style errors following the [code style guide](style.md).
#### Running style check locally:
```sh
mkdir -p /tmp/test_output
# running all checks
docker run --rm --volume=.:/ClickHouse --volume=/tmp/test_output:/test_output -u $(id -u ${USER}):$(id -g ${USER}) --cap-add=SYS_PTRACE clickhouse/style-test
# run specified check script (e.g.: ./check-mypy)
docker run --rm --volume=.:/ClickHouse --volume=/tmp/test_output:/test_output -u $(id -u ${USER}):$(id -g ${USER}) --cap-add=SYS_PTRACE --entrypoint= -w/ClickHouse/utils/check-style clickhouse/style-test ./check-mypy
# find all style check scripts under the directory:
cd ./utils/check-style
# Check duplicate includes
./check-duplicate-includes.sh
# Check c++ formatiing
./check-style
# Check python formatting with black
./check-black
# Check python type hinting with mypy
./check-mypy
# Check code with codespell
./check-typos
# Check docs spelling
./check-doc-aspell
# Check whitespaces
./check-whitespaces
# Check github actions workflows
./check-workflows
# Check submodules
./check-submodules
# Check shell scripts with shellcheck
./shellcheck-run.sh
```
## Fast Test
Normally this is the first check that is ran for a PR. It builds ClickHouse and
@ -75,6 +117,15 @@ some. If it fails, further checks are not started until it is fixed. Look at
the report to see which tests fail, then reproduce the failure locally as
described [here](tests.md#functional-test-locally).
#### Running Fast Test locally:
```sh
mkdir -p /tmp/test_output
mkdir -p /tmp/fasttest-workspace
cd ClickHouse
# this docker command performs minimal ClickHouse build and run FastTests against it
docker run --rm --cap-add=SYS_PTRACE -u $(id -u ${USER}):$(id -g ${USER}) --network=host -e FASTTEST_WORKSPACE=/fasttest-workspace -e FASTTEST_OUTPUT=/test_output -e FASTTEST_SOURCE=/ClickHouse --cap-add=SYS_PTRACE -e stage=clone_submodules --volume=/tmp/fasttest-workspace:/fasttest-workspace --volume=.:/ClickHouse --volume=/tmp/test_output:/test_output clickhouse/fasttest
```
#### Status Page Files
- `runlog.out.log` is the general log that includes all other logs.
@ -122,6 +173,13 @@ Builds ClickHouse in various configurations for use in further steps. You have t
## Special Build Check
Performs static analysis and code style checks using `clang-tidy`. The report is similar to the [build check](#build-check). Fix the errors found in the build log.
#### Running clang-tidy locally:
There is a convenience `packager` script that runs the clang-tidy build in docker
```sh
mkdir build_tidy
./docker/packager/packager --output-dir=./build_tidy --package-type=binary --compiler=clang-17 --debug-build --clang-tidy
```
## Functional Stateless Tests
Runs [stateless functional tests](tests.md#functional-tests) for ClickHouse

1
tests/README.md Normal file
View File

@ -0,0 +1 @@
Find CI documents and instructions on running CI checks localy [here](https://clickhouse.com/docs/en/development/continuous-integration).

View File

@ -51,9 +51,9 @@ def get_gh_api(
sleep: int = 3,
**kwargs: Any,
) -> requests.Response:
"""It's a wrapper around get_with_retries that requests GH api w/o auth by
default, and falls back to the get_best_robot_token in case of receiving
"403 rate limit exceeded" error
"""
Request GH api w/o auth by default, and failover to the get_best_robot_token in case of receiving
"403 rate limit exceeded" or "404 not found" error
It sets auth automatically when ROBOT_TOKEN is already set by get_best_robot_token
"""
@ -71,27 +71,39 @@ def get_gh_api(
if grt.ROBOT_TOKEN is not None:
set_auth_header()
need_retry = False
for _ in range(retries):
token_is_set = "Authorization" in kwargs.get("headers", {})
exc = Exception("A placeholder to satisfy typing and avoid nesting")
try_cnt = 0
while try_cnt < retries:
try_cnt += 1
try:
response = get_with_retries(url, 1, sleep, **kwargs)
response = requests.get(url, **kwargs)
response.raise_for_status()
return response
except requests.HTTPError as exc:
if (
exc.response.status_code == 403
except requests.HTTPError as e:
exc = e
ratelimit_exceeded = (
e.response.status_code == 403
and b"rate limit exceeded"
in exc.response._content # pylint:disable=protected-access
):
in e.response._content # pylint:disable=protected-access
)
try_auth = e.response.status_code == 404
if (ratelimit_exceeded or try_auth) and not token_is_set:
logging.warning(
"Received rate limit exception, setting the auth header and retry"
)
set_auth_header()
need_retry = True
break
token_is_set = True
try_cnt = 0
continue
except Exception as e:
exc = e
if need_retry:
return get_with_retries(url, retries, sleep, **kwargs)
if try_cnt < retries:
logging.info("Exception '%s' while getting, retry %i", exc, try_cnt)
time.sleep(sleep)
raise exc
def get_build_name_for_check(check_name: str) -> str:

View File

@ -4,8 +4,6 @@ import logging
import os
from typing import Dict, List, Set, Union, Literal
from unidiff import PatchSet # type: ignore
from build_download_helper import get_gh_api
from env_helper import (
GITHUB_REPOSITORY,
@ -171,7 +169,11 @@ class PRInfo:
response_json = user_orgs_response.json()
self.user_orgs = set(org["id"] for org in response_json)
self.diff_urls.append(github_event["pull_request"]["diff_url"])
self.diff_urls.append(
f"https://api.github.com/repos/{GITHUB_REPOSITORY}/"
f"compare/master...{self.head_ref}"
)
elif "commits" in github_event:
# `head_commit` always comes with `commits`
commit_message = github_event["head_commit"]["message"] # type: str
@ -215,12 +217,12 @@ class PRInfo:
# files changed in upstream AND master...{self.head_ref}
# to get files, changed in current HEAD
self.diff_urls.append(
f"https://github.com/{GITHUB_REPOSITORY}/"
f"compare/master...{self.head_ref}.diff"
f"https://api.github.com/repos/{GITHUB_REPOSITORY}/"
f"compare/master...{self.head_ref}"
)
self.diff_urls.append(
f"https://github.com/{GITHUB_REPOSITORY}/"
f"compare/{self.head_ref}...master.diff"
f"https://api.github.com/repos/{GITHUB_REPOSITORY}/"
f"compare/{self.head_ref}...master"
)
# Get release PR number.
self.release_pr = get_pr_for_commit(self.base_ref, self.base_ref)[
@ -232,8 +234,8 @@ class PRInfo:
# For release PRs we must get not only files changed in the PR
# itself, but as well files changed since we branched out
self.diff_urls.append(
f"https://github.com/{GITHUB_REPOSITORY}/"
f"compare/{self.head_ref}...master.diff"
f"https://api.github.com/repos/{GITHUB_REPOSITORY}/"
f"compare/{self.head_ref}...master"
)
else:
print("event.json does not match pull_request or push:")
@ -261,19 +263,11 @@ class PRInfo:
raise TypeError("The event does not have diff URLs")
for diff_url in self.diff_urls:
response = get_gh_api(
diff_url,
sleep=RETRY_SLEEP,
)
response = get_gh_api(diff_url, sleep=RETRY_SLEEP)
response.raise_for_status()
if "commits" in self.event and self.number == 0:
diff = response.json()
if "files" in diff:
self.changed_files = {f["filename"] for f in diff["files"]}
else:
diff_object = PatchSet(response.text)
self.changed_files.update({f.path for f in diff_object})
print(f"Fetched info about {len(self.changed_files)} changed files")
def get_dict(self):

View File

@ -90,6 +90,7 @@ systemctl restart docker
sudo -u ubuntu docker buildx version
sudo -u ubuntu docker buildx create --use --name default-builder
# FIXME: remove unidiff as soon as no old PRs could use it, here and in Dockerfile
pip install boto3 pygithub requests urllib3 unidiff dohq-artifactory
mkdir -p $RUNNER_HOME && cd $RUNNER_HOME