Merge pull request #49701 from ClickHouse/fix-browser

Improve woboq codebrowser pipeline
This commit is contained in:
Mikhail f. Shiryaev 2023-05-12 19:45:17 +02:00 committed by GitHub
commit b1fd1d3ae6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 154 additions and 60 deletions

View File

@ -72,6 +72,9 @@ jobs:
with:
name: changed_images
path: ${{ runner.temp }}/changed_images.json
Codebrowser:
needs: [DockerHubPush]
uses: ./.github/workflows/woboq.yml
BuilderCoverity:
needs: DockerHubPush
runs-on: [self-hosted, builder]

View File

@ -6,9 +6,8 @@ env:
concurrency:
group: woboq
on: # yamllint disable-line rule:truthy
schedule:
- cron: '0 */18 * * *'
workflow_dispatch:
workflow_call:
jobs:
# don't use dockerhub push because this image updates so rarely
WoboqCodebrowser:
@ -26,6 +25,10 @@ jobs:
with:
clear-repository: true
submodules: 'true'
- name: Download json reports
uses: actions/download-artifact@v3
with:
path: ${{ env.IMAGES_PATH }}
- name: Codebrowser
run: |
sudo rm -fr "$TEMP_PATH"

View File

@ -20,26 +20,11 @@ RUN arch=${TARGETARCH:-amd64} \
# repo versions doesn't work correctly with C++17
# also we push reports to s3, so we add index.html to subfolder urls
# https://github.com/ClickHouse-Extras/woboq_codebrowser/commit/37e15eaf377b920acb0b48dbe82471be9203f76b
RUN git clone --depth=1 https://github.com/ClickHouse/woboq_codebrowser /woboq_codebrowser \
# https://github.com/ClickHouse/woboq_codebrowser/commit/37e15eaf377b920acb0b48dbe82471be9203f76b
RUN git clone --branch=master --depth=1 https://github.com/ClickHouse/woboq_codebrowser /woboq_codebrowser \
&& cd /woboq_codebrowser \
&& cmake . -G Ninja -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_COMPILER=clang\+\+-${LLVM_VERSION} -DCMAKE_C_COMPILER=clang-${LLVM_VERSION} -DCLANG_BUILTIN_HEADERS_DIR=/usr/lib/llvm-${LLVM_VERSION}/lib/clang/${LLVM_VERSION}/include \
&& ninja
ENV CODEGEN=/woboq_codebrowser/generator/codebrowser_generator
ENV CODEINDEX=/woboq_codebrowser/indexgenerator/codebrowser_indexgenerator
ENV STATIC_DATA=/woboq_codebrowser/data
ENV SOURCE_DIRECTORY=/repo_folder
ENV BUILD_DIRECTORY=/build
ENV HTML_RESULT_DIRECTORY=$BUILD_DIRECTORY/html_report
ENV SHA=nosha
ENV DATA="https://s3.amazonaws.com/clickhouse-test-reports/codebrowser/data"
CMD mkdir -p $BUILD_DIRECTORY && cd $BUILD_DIRECTORY && \
cmake $SOURCE_DIRECTORY -DCMAKE_CXX_COMPILER=/usr/bin/clang\+\+-${LLVM_VERSION} -DCMAKE_C_COMPILER=/usr/bin/clang-${LLVM_VERSION} -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DENABLE_EMBEDDED_COMPILER=0 -DENABLE_S3=0 && \
mkdir -p $HTML_RESULT_DIRECTORY && \
$CODEGEN -b $BUILD_DIRECTORY -a -o $HTML_RESULT_DIRECTORY -p ClickHouse:$SOURCE_DIRECTORY:$SHA -d $DATA | ts '%Y-%m-%d %H:%M:%S' && \
cp -r $STATIC_DATA $HTML_RESULT_DIRECTORY/ &&\
$CODEINDEX $HTML_RESULT_DIRECTORY -d "$DATA" | ts '%Y-%m-%d %H:%M:%S' && \
mv $HTML_RESULT_DIRECTORY /test_output
COPY build.sh /
CMD ["bash", "-c", "/build.sh 2>&1"]

View File

@ -0,0 +1,29 @@
#!/usr/bin/env bash
set -x -e
STATIC_DATA=${STATIC_DATA:-/woboq_codebrowser/data}
SOURCE_DIRECTORY=${SOURCE_DIRECTORY:-/build}
BUILD_DIRECTORY=${BUILD_DIRECTORY:-/workdir/build}
OUTPUT_DIRECTORY=${OUTPUT_DIRECTORY:-/workdir/output}
HTML_RESULT_DIRECTORY=${HTML_RESULT_DIRECTORY:-$OUTPUT_DIRECTORY/html_report}
SHA=${SHA:-nosha}
DATA=${DATA:-https://s3.amazonaws.com/clickhouse-test-reports/codebrowser/data}
nproc=$(($(nproc) + 2)) # increase parallelism
read -ra CMAKE_FLAGS <<< "${CMAKE_FLAGS:-}"
mkdir -p "$BUILD_DIRECTORY" && cd "$BUILD_DIRECTORY"
cmake "$SOURCE_DIRECTORY" -DCMAKE_CXX_COMPILER="/usr/bin/clang++-${LLVM_VERSION}" -DCMAKE_C_COMPILER="/usr/bin/clang-${LLVM_VERSION}" -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DENABLE_EMBEDDED_COMPILER=0 "${CMAKE_FLAGS[@]}"
mkdir -p "$HTML_RESULT_DIRECTORY"
echo 'Filter out too noisy "Error: filename" lines and keep them in full codebrowser_generator.log'
/woboq_codebrowser/generator/codebrowser_generator -b "$BUILD_DIRECTORY" -a \
-o "$HTML_RESULT_DIRECTORY" --execute-concurrency="$nproc" -p "ClickHouse:$SOURCE_DIRECTORY:$SHA" \
-d "$DATA" \
|& ts '%Y-%m-%d %H:%M:%S' \
| tee "$OUTPUT_DIRECTORY/codebrowser_generator.log" \
| grep --line-buffered -v ':[0-9]* Error: '
cp -r "$STATIC_DATA" "$HTML_RESULT_DIRECTORY/"
/woboq_codebrowser/indexgenerator/codebrowser_indexgenerator "$HTML_RESULT_DIRECTORY" \
-d "$DATA" |& ts '%Y-%m-%d %H:%M:%S'

View File

@ -1,18 +1,19 @@
#!/usr/bin/env python3
import os
import subprocess
import logging
import os
from pathlib import Path
from github import Github
from commit_status_helper import get_commit, post_commit_status
from docker_pull_helper import get_image_with_version
from docker_pull_helper import get_image_with_version, DockerImage
from env_helper import (
IMAGES_PATH,
REPO_COPY,
S3_DOWNLOAD,
S3_BUILDS_BUCKET,
S3_TEST_REPORTS_BUCKET,
TEMP_PATH,
)
@ -27,16 +28,24 @@ from upload_result_helper import upload_results
NAME = "Woboq Build"
def get_run_command(repo_path, output_path, image):
def get_run_command(
repo_path: Path, output_path: Path, image: DockerImage, sha: str
) -> str:
user = f"{os.geteuid()}:{os.getegid()}"
cmd = (
"docker run " + f"--volume={repo_path}:/repo_folder "
f"--volume={output_path}:/test_output "
f"-e 'DATA={S3_DOWNLOAD}/{S3_TEST_REPORTS_BUCKET}/codebrowser/data' {image}"
f"docker run --rm --user={user} --volume={repo_path}:/build "
f"--volume={output_path}:/workdir/output --network=host "
# use sccache, https://github.com/KDAB/codebrowser/issues/111
f"-e SCCACHE_BUCKET='{S3_BUILDS_BUCKET}' "
"-e SCCACHE_S3_KEY_PREFIX=ccache/sccache "
'-e CMAKE_FLAGS="$CMAKE_FLAGS -DCOMPILER_CACHE=sccache" '
f"-e 'DATA={S3_DOWNLOAD}/{S3_TEST_REPORTS_BUCKET}/codebrowser/data' "
f"-e SHA={sha} {image}"
)
return cmd
if __name__ == "__main__":
def main():
logging.basicConfig(level=logging.INFO)
stopwatch = Stopwatch()
@ -44,48 +53,83 @@ if __name__ == "__main__":
gh = Github(get_best_robot_token(), per_page=100)
pr_info = PRInfo()
commit = get_commit(gh, pr_info.sha)
temp_path = Path(TEMP_PATH)
if not os.path.exists(TEMP_PATH):
os.makedirs(TEMP_PATH)
if not temp_path.exists():
os.makedirs(temp_path)
docker_image = get_image_with_version(IMAGES_PATH, "clickhouse/codebrowser")
s3_helper = S3Helper()
result_path = os.path.join(TEMP_PATH, "result_path")
if not os.path.exists(result_path):
result_path = temp_path / "result_path"
if not result_path.exists():
os.makedirs(result_path)
run_command = get_run_command(REPO_COPY, result_path, docker_image)
run_command = get_run_command(
Path(REPO_COPY), result_path, docker_image, pr_info.sha[:12]
)
logging.info("Going to run codebrowser: %s", run_command)
run_log_path = os.path.join(TEMP_PATH, "run.log")
run_log_path = result_path / "run.log"
state = "success"
with TeePopen(run_command, run_log_path) as process:
retcode = process.wait()
if retcode == 0:
logging.info("Run successfully")
else:
logging.info("Run failed")
state = "failure"
subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {TEMP_PATH}", shell=True)
report_path = os.path.join(result_path, "html_report")
report_path = result_path / "html_report"
logging.info("Report path %s", report_path)
s3_path_prefix = "codebrowser"
html_urls = s3_helper.fast_parallel_upload_dir(
report_path, s3_path_prefix, "clickhouse-test-reports"
)
if state == "success":
_ = s3_helper.fast_parallel_upload_dir(
report_path, s3_path_prefix, S3_TEST_REPORTS_BUCKET
)
index_html = (
'<a href="{S3_DOWNLOAD}/{S3_TEST_REPORTS_BUCKET}/codebrowser/index.html">'
"HTML report</a>"
f'<a href="{S3_DOWNLOAD}/{S3_TEST_REPORTS_BUCKET}/codebrowser/index.html">'
"Generate codebrowser site</a>"
)
test_result = TestResult(index_html, "Look at the report")
additional_logs = [path.absolute() for path in result_path.glob("*.log")]
report_url = upload_results(s3_helper, 0, pr_info.sha, [test_result], [], NAME)
test_results = [
TestResult(index_html, state, stopwatch.duration_seconds, additional_logs)
]
# Check if the run log contains `FATAL Error:`, that means the code problem
stopwatch = Stopwatch()
fatal_error = "FATAL Error:"
logging.info("Search for '%s' in %s", fatal_error, run_log_path)
with open(run_log_path, "r", encoding="utf-8") as rlfd:
for line in rlfd.readlines():
if "FATAL Error:" in line:
logging.warning(
"The line '%s' found, mark the run as failure", fatal_error
)
state = "failure"
test_results.append(
TestResult(
"Indexing error",
state,
stopwatch.duration_seconds,
additional_logs,
)
)
break
report_url = upload_results(
s3_helper, pr_info.number, pr_info.sha, test_results, [], NAME
)
print(f"::notice ::Report url: {report_url}")
post_commit_status(commit, "success", report_url, "Report built", NAME, pr_info)
post_commit_status(commit, state, report_url, "Report built", NAME, pr_info)
if __name__ == "__main__":
main()

View File

@ -1,3 +1,4 @@
import logging
import os
from os import path as p
@ -65,6 +66,32 @@ def GITHUB_JOB_ID() -> str:
):
_GITHUB_JOB_ID = "0"
# FIXME: until it's here, we can't move to reusable workflows
if not _GITHUB_JOB_URL:
# This is a terrible workaround for the case of another broken part of
# GitHub actions. For nested workflows it doesn't provide a proper GITHUB_JOB
# value, but only the final one. So, for `OriginalJob / NestedJob / FinalJob`
# full name, GITHUB_JOB contains only FinalJob
matched_jobs = []
for job in jobs:
nested_parts = job["name"].split(" / ")
if len(nested_parts) <= 1:
continue
if nested_parts[-1] == GITHUB_JOB:
matched_jobs.append(job)
if len(matched_jobs) == 1:
# The best case scenario
_GITHUB_JOB_ID = matched_jobs[0]["id"]
_GITHUB_JOB_URL = matched_jobs[0]["html_url"]
return _GITHUB_JOB_ID
if matched_jobs:
logging.error(
"We could not get the ID and URL for the current job name %s, there "
"are more than one jobs match it for the nested workflows. Please, "
"refer to https://github.com/actions/runner/issues/2577",
GITHUB_JOB,
)
return _GITHUB_JOB_ID

View File

@ -6,8 +6,11 @@ import re
import shutil
import time
from multiprocessing.dummy import Pool
from pathlib import Path
from typing import List, Union
import boto3 # type: ignore
import botocore # type: ignore
from env_helper import (
S3_TEST_REPORTS_BUCKET,
@ -40,9 +43,12 @@ def _flatten_list(lst):
class S3Helper:
max_pool_size = 100
def __init__(self):
config = botocore.config.Config(max_pool_connections=self.max_pool_size)
self.session = boto3.session.Session(region_name="us-east-1")
self.client = self.session.client("s3", endpoint_url=S3_URL)
self.client = self.session.client("s3", endpoint_url=S3_URL, config=config)
self.host = S3_URL
self.download_host = S3_DOWNLOAD
@ -124,7 +130,9 @@ class S3Helper:
else:
return S3Helper.copy_file_to_local(S3_BUILDS_BUCKET, file_path, s3_path)
def fast_parallel_upload_dir(self, dir_path, s3_dir_path, bucket_name):
def fast_parallel_upload_dir(
self, dir_path: Union[str, Path], s3_dir_path: str, bucket_name: str
) -> List[str]:
all_files = []
for root, _, files in os.walk(dir_path):
@ -137,12 +145,12 @@ class S3Helper:
t = time.time()
sum_time = 0
def upload_task(file_path):
def upload_task(file_path: str) -> str:
nonlocal counter
nonlocal t
nonlocal sum_time
try:
s3_path = file_path.replace(dir_path, s3_dir_path)
s3_path = file_path.replace(str(dir_path), s3_dir_path)
metadata = {}
if s3_path.endswith("html"):
metadata["ContentType"] = "text/html; charset=utf-8"
@ -167,25 +175,20 @@ class S3Helper:
if counter % 1000 == 0:
sum_time += int(time.time() - t)
print(
"Uploaded",
counter,
"-",
int(time.time() - t),
"s",
"sum time",
sum_time,
"s",
f"Uploaded {counter}, {int(time.time()-t)}s, "
f"sum time {sum_time}s",
)
t = time.time()
except Exception as ex:
logging.critical("Failed to upload file, expcetion %s", ex)
return f"{self.download_host}/{bucket_name}/{s3_path}"
p = Pool(256)
p = Pool(self.max_pool_size)
original_level = logging.root.level
logging.basicConfig(level=logging.CRITICAL)
result = sorted(_flatten_list(p.map(upload_task, all_files)))
logging.basicConfig(level=logging.INFO)
logging.basicConfig(level=original_level)
return result
def _upload_folder_to_s3(