Merge pull request #53213 from ClickHouse/revert-53210-revert-53100-upload-build-profile

Revert "Revert "Upload build time-trace data to CI database""
This commit is contained in:
Alexey Milovidov 2023-08-10 06:43:14 +03:00 committed by GitHub
commit 6e3100a79d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 164 additions and 47 deletions

View File

@ -3,6 +3,9 @@ name: BackportPR
env:
# Force the stdout and stderr streams to be unbuffered
PYTHONUNBUFFERED: 1
# Export system tables to ClickHouse Cloud
CLICKHOUSE_CI_LOGS_HOST: ${{ secrets.CLICKHOUSE_CI_LOGS_HOST }}
CLICKHOUSE_CI_LOGS_PASSWORD: ${{ secrets.CLICKHOUSE_CI_LOGS_PASSWORD }}
on: # yamllint disable-line rule:truthy
push:

View File

@ -3,6 +3,9 @@ name: MasterCI
env:
# Force the stdout and stderr streams to be unbuffered
PYTHONUNBUFFERED: 1
# Export system tables to ClickHouse Cloud
CLICKHOUSE_CI_LOGS_HOST: ${{ secrets.CLICKHOUSE_CI_LOGS_HOST }}
CLICKHOUSE_CI_LOGS_PASSWORD: ${{ secrets.CLICKHOUSE_CI_LOGS_PASSWORD }}
on: # yamllint disable-line rule:truthy
push:

View File

@ -3,6 +3,9 @@ name: PullRequestCI
env:
# Force the stdout and stderr streams to be unbuffered
PYTHONUNBUFFERED: 1
# Export system tables to ClickHouse Cloud
CLICKHOUSE_CI_LOGS_HOST: ${{ secrets.CLICKHOUSE_CI_LOGS_HOST }}
CLICKHOUSE_CI_LOGS_PASSWORD: ${{ secrets.CLICKHOUSE_CI_LOGS_PASSWORD }}
on: # yamllint disable-line rule:truthy
pull_request:

View File

@ -3,6 +3,9 @@ name: ReleaseBranchCI
env:
# Force the stdout and stderr streams to be unbuffered
PYTHONUNBUFFERED: 1
# Export system tables to ClickHouse Cloud
CLICKHOUSE_CI_LOGS_HOST: ${{ secrets.CLICKHOUSE_CI_LOGS_HOST }}
CLICKHOUSE_CI_LOGS_PASSWORD: ${{ secrets.CLICKHOUSE_CI_LOGS_PASSWORD }}
on: # yamllint disable-line rule:truthy
push:

View File

@ -22,7 +22,7 @@ def check_image_exists_locally(image_name: str) -> bool:
output = subprocess.check_output(
f"docker images -q {image_name} 2> /dev/null", shell=True
)
return output != ""
return output != b""
except subprocess.CalledProcessError:
return False
@ -46,7 +46,7 @@ def build_image(image_name: str, filepath: Path) -> None:
)
def pre_build(repo_path: Path, env_variables: List[str]):
def pre_build(repo_path: Path, env_variables: List[str]) -> None:
if "WITH_PERFORMANCE=1" in env_variables:
current_branch = subprocess.check_output(
"git branch --show-current", shell=True, encoding="utf-8"
@ -81,8 +81,9 @@ def run_docker_image_with_env(
env_variables: List[str],
ch_root: Path,
ccache_dir: Optional[Path],
):
) -> None:
output_dir.mkdir(parents=True, exist_ok=True)
env_part = " -e ".join(env_variables)
if env_part:
env_part = " -e " + env_part
@ -129,9 +130,10 @@ def parse_env_variables(
version: str,
official: bool,
additional_pkgs: bool,
with_profiler: bool,
with_coverage: bool,
with_binaries: str,
):
) -> List[str]:
DARWIN_SUFFIX = "-darwin"
DARWIN_ARM_SUFFIX = "-darwin-aarch64"
ARM_SUFFIX = "-aarch64"
@ -322,6 +324,9 @@ def parse_env_variables(
# utils are not included into clickhouse-bundle, so build everything
build_target = "all"
if with_profiler:
cmake_flags.append("-DENABLE_BUILD_PROFILING=1")
if with_coverage:
cmake_flags.append("-DWITH_COVERAGE=1")
@ -416,6 +421,7 @@ def parse_args() -> argparse.Namespace:
parser.add_argument("--version")
parser.add_argument("--official", action="store_true")
parser.add_argument("--additional-pkgs", action="store_true")
parser.add_argument("--with-profiler", action="store_true")
parser.add_argument("--with-coverage", action="store_true")
parser.add_argument(
"--with-binaries", choices=("programs", "tests", ""), default=""
@ -451,7 +457,7 @@ def parse_args() -> argparse.Namespace:
return args
def main():
def main() -> None:
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s")
args = parse_args()
@ -479,6 +485,7 @@ def main():
args.version,
args.official,
args.additional_pkgs,
args.with_profiler,
args.with_coverage,
args.with_binaries,
)

View File

@ -396,9 +396,9 @@ std::unique_ptr<ReadBuffer> createReadBuffer(
throw Exception(ErrorCodes::CANNOT_COMPILE_REGEXP,
"Cannot compile regex from glob ({}): {}", current_path, matcher->error());
return reader->readFile([matcher = std::move(matcher)](const std::string & path)
return reader->readFile([my_matcher = std::move(matcher)](const std::string & path)
{
return re2::RE2::FullMatch(path, *matcher);
return re2::RE2::FullMatch(path, *my_matcher);
});
}
else

View File

@ -1,12 +1,13 @@
#!/usr/bin/env python3
from pathlib import Path
from typing import List, Tuple
import subprocess
import logging
import json
import os
import sys
import time
from typing import List, Tuple
from ci_config import CI_CONFIG, BuildConfig
from docker_pull_helper import get_image_with_version
@ -18,18 +19,19 @@ from env_helper import (
S3_DOWNLOAD,
TEMP_PATH,
)
from git_helper import Git, git_runner
from pr_info import PRInfo
from s3_helper import S3Helper
from tee_popen import TeePopen
from version_helper import (
ClickHouseVersion,
Git,
get_version_from_repo,
update_version_local,
)
from clickhouse_helper import (
ClickHouseHelper,
prepare_tests_results_for_clickhouse,
get_instance_type,
)
from stopwatch import Stopwatch
@ -50,7 +52,7 @@ def _can_export_binaries(build_config: BuildConfig) -> bool:
def get_packager_cmd(
build_config: BuildConfig,
packager_path: str,
output_path: str,
output_path: Path,
build_version: str,
image_version: str,
official: bool,
@ -59,8 +61,8 @@ def get_packager_cmd(
comp = build_config.compiler
cmake_flags = "-DENABLE_CLICKHOUSE_SELF_EXTRACTING=1"
cmd = (
f"cd {packager_path} && CMAKE_FLAGS='{cmake_flags}' ./packager --output-dir={output_path} "
f"--package-type={package_type} --compiler={comp}"
f"cd {packager_path} && CMAKE_FLAGS='{cmake_flags}' ./packager "
f"--output-dir={output_path} --package-type={package_type} --compiler={comp}"
)
if build_config.debug_build:
@ -78,6 +80,7 @@ def get_packager_cmd(
cmd += " --additional-pkgs"
cmd += f" --docker-image-version={image_version}"
cmd += " --with-profiler"
cmd += f" --version={build_version}"
if _can_export_binaries(build_config):
@ -90,13 +93,13 @@ def get_packager_cmd(
def build_clickhouse(
packager_cmd: str, logs_path: str, build_output_path: str
) -> Tuple[str, bool]:
build_log_path = os.path.join(logs_path, BUILD_LOG_NAME)
packager_cmd: str, logs_path: Path, build_output_path: Path
) -> Tuple[Path, bool]:
build_log_path = logs_path / BUILD_LOG_NAME
success = False
with TeePopen(packager_cmd, build_log_path) as process:
retcode = process.wait()
if os.path.exists(build_output_path):
if build_output_path.exists():
build_results = os.listdir(build_output_path)
else:
build_results = []
@ -217,7 +220,7 @@ def upload_master_static_binaries(
pr_info: PRInfo,
build_config: BuildConfig,
s3_helper: S3Helper,
build_output_path: str,
build_output_path: Path,
) -> None:
"""Upload binary artifacts to a static S3 links"""
static_binary_name = build_config.static_binary_name
@ -229,7 +232,7 @@ def upload_master_static_binaries(
return
s3_path = "/".join((pr_info.base_ref, static_binary_name, "clickhouse"))
binary = os.path.join(build_output_path, "clickhouse")
binary = build_output_path / "clickhouse"
url = s3_helper.upload_build_file_to_s3(binary, s3_path)
print(f"::notice ::Binary static URL: {url}")
@ -242,8 +245,8 @@ def main():
build_config = CI_CONFIG.build_config[build_name]
if not os.path.exists(TEMP_PATH):
os.makedirs(TEMP_PATH)
temp_path = Path(TEMP_PATH)
os.makedirs(temp_path, exist_ok=True)
pr_info = PRInfo()
@ -282,9 +285,8 @@ def main():
logging.info("Build short name %s", build_name)
build_output_path = os.path.join(TEMP_PATH, build_name)
if not os.path.exists(build_output_path):
os.makedirs(build_output_path)
build_output_path = temp_path / build_name
os.makedirs(build_output_path, exist_ok=True)
packager_cmd = get_packager_cmd(
build_config,
@ -297,9 +299,8 @@ def main():
logging.info("Going to run packager with %s", packager_cmd)
logs_path = os.path.join(TEMP_PATH, "build_log")
if not os.path.exists(logs_path):
os.makedirs(logs_path)
logs_path = temp_path / "build_log"
os.makedirs(logs_path, exist_ok=True)
start = time.time()
log_path, success = build_clickhouse(packager_cmd, logs_path, build_output_path)
@ -320,8 +321,8 @@ def main():
# FIXME performance
performance_urls = []
performance_path = os.path.join(build_output_path, "performance.tar.zst")
if os.path.exists(performance_path):
performance_path = build_output_path / "performance.tar.zst"
if performance_path.exists():
performance_urls.append(
s3_helper.upload_build_file_to_s3(performance_path, s3_performance_path)
)
@ -344,9 +345,9 @@ def main():
print("::notice ::Build URLs: {}".format("\n".join(build_urls)))
if os.path.exists(log_path):
if log_path.exists():
log_url = s3_helper.upload_build_file_to_s3(
log_path, s3_path_prefix + "/" + os.path.basename(log_path)
log_path, s3_path_prefix + "/" + log_path.name
)
logging.info("Log url %s", log_url)
else:
@ -360,7 +361,79 @@ def main():
upload_master_static_binaries(pr_info, build_config, s3_helper, build_output_path)
# Upload profile data
ch_helper = ClickHouseHelper()
clickhouse_ci_logs_host = os.getenv("CLICKHOUSE_CI_LOGS_HOST", "")
if clickhouse_ci_logs_host:
instance_type = get_instance_type()
query = f"""INSERT INTO build_time_trace
(
pull_request_number,
commit_sha,
check_start_time,
check_name,
instance_type,
file,
library,
time,
pid,
tid,
ph,
ts,
dur,
cat,
name,
detail,
count,
avgMs,
args_name
)
SELECT {pr_info.number}, '{pr_info.sha}', '{stopwatch.start_time_str}', '{build_name}', '{instance_type}', *
FROM input('
file String,
library String,
time DateTime64(6),
pid UInt32,
tid UInt32,
ph String,
ts UInt64,
dur UInt64,
cat String,
name String,
detail String,
count UInt64,
avgMs UInt64,
args_name String')
FORMAT JSONCompactEachRow"""
auth = {
"X-ClickHouse-User": "ci",
"X-ClickHouse-Key": os.getenv("CLICKHOUSE_CI_LOGS_PASSWORD", ""),
}
url = f"https://{clickhouse_ci_logs_host}/"
profiles_dir = temp_path / "profiles_source"
os.makedirs(profiles_dir, exist_ok=True)
logging.info("Processing profile JSON files from {GIT_REPO_ROOT}/build_docker")
git_runner(
"./utils/prepare-time-trace/prepare-time-trace.sh "
f"build_docker {profiles_dir.absolute()}"
)
profile_data_file = temp_path / "profile.json"
with open(profile_data_file, "wb") as profile_fd:
for profile_sourse in os.listdir(profiles_dir):
with open(profiles_dir / profile_sourse, "rb") as ps_fd:
profile_fd.write(ps_fd.read())
logging.info(
"::notice ::Log Uploading profile data, path: %s, size: %s, query: %s",
profile_data_file,
profile_data_file.stat().st_size,
query,
)
ch_helper.insert_file(url, auth, query, profile_data_file)
# Upload statistics to CI database
prepared_events = prepare_tests_results_for_clickhouse(
pr_info,
[],

View File

@ -1,5 +1,6 @@
#!/usr/bin/env python3
from typing import List
from pathlib import Path
from typing import Dict, List, Optional
import json
import logging
import time
@ -16,30 +17,60 @@ class InsertException(Exception):
class ClickHouseHelper:
def __init__(self, url=None):
def __init__(
self, url: Optional[str] = None, auth: Optional[Dict[str, str]] = None
):
if url is None:
url = get_parameter_from_ssm("clickhouse-test-stat-url")
self.url = url
self.auth = {
self.auth = auth or {
"X-ClickHouse-User": get_parameter_from_ssm("clickhouse-test-stat-login"),
"X-ClickHouse-Key": get_parameter_from_ssm("clickhouse-test-stat-password"),
}
@staticmethod
def _insert_json_str_info_impl(url, auth, db, table, json_str):
def insert_file(
url: str,
auth: Optional[Dict[str, str]],
query: str,
file: Path,
additional_options: Optional[Dict[str, str]] = None,
) -> None:
params = {
"query": query,
"date_time_input_format": "best_effort",
"send_logs_level": "warning",
}
if additional_options:
for k, v in additional_options.items():
params[k] = v
with open(file, "rb") as data_fd:
ClickHouseHelper._insert_post(
url, params=params, data=data_fd, headers=auth
)
@staticmethod
def insert_json_str(url, auth, db, table, json_str):
params = {
"database": db,
"query": f"INSERT INTO {table} FORMAT JSONEachRow",
"date_time_input_format": "best_effort",
"send_logs_level": "warning",
}
ClickHouseHelper._insert_post(url, params=params, data=json_str, headers=auth)
@staticmethod
def _insert_post(*args, **kwargs):
url = ""
if args:
url = args[0]
url = kwargs.get("url", url)
for i in range(5):
try:
response = requests.post(
url, params=params, data=json_str, headers=auth
)
response = requests.post(*args, **kwargs)
except Exception as e:
error = f"Received exception while sending data to {url} on {i} attempt: {e}"
logging.warning(error)
@ -51,13 +82,8 @@ class ClickHouseHelper:
break
error = (
"Cannot insert data into clickhouse at try "
+ str(i)
+ ": HTTP code "
+ str(response.status_code)
+ ": '"
+ str(response.text)
+ "'"
f"Cannot insert data into clickhouse at try {i}: HTTP code "
f"{response.status_code}: '{response.text}'"
)
if response.status_code >= 500:
@ -76,7 +102,7 @@ class ClickHouseHelper:
raise InsertException(error)
def _insert_json_str_info(self, db, table, json_str):
self._insert_json_str_info_impl(self.url, self.auth, db, table, json_str)
self.insert_json_str(self.url, self.auth, db, table, json_str)
def insert_event_into(self, db, table, event, safe=True):
event_str = json.dumps(event)

View File

@ -35,7 +35,6 @@ ENGINE = MergeTree ORDER BY (date, file, name, args_name);
INPUT_DIR=$1
OUTPUT_DIR=$2
EXTRA_COLUMN_VALUES=$3
find "$INPUT_DIR" -name '*.json' | grep -P '\.(c|cpp|cc|cxx)\.json$' | xargs -P $(nproc) -I{} bash -c "
@ -43,7 +42,7 @@ find "$INPUT_DIR" -name '*.json' | grep -P '\.(c|cpp|cc|cxx)\.json$' | xargs -P
LIBRARY_NAME=\$(echo '{}' | sed -r -e 's!^.*/CMakeFiles/([^/]+)\.dir/.*\$!\1!')
START_TIME=\$(jq '.beginningOfTime' '{}')
jq -c '.traceEvents[] | [${EXTRA_COLUMN_VALUES} \"'\"\$ORIGINAL_FILENAME\"'\", \"'\"\$LIBRARY_NAME\"'\", '\$START_TIME', .pid, .tid, .ph, .ts, .dur, .cat, .name, .args.detail, .args.count, .args[\"avg ms\"], .args.name]' '{}' > \"${OUTPUT_DIR}/\$\$\"
jq -c '.traceEvents[] | [\"'\"\$ORIGINAL_FILENAME\"'\", \"'\"\$LIBRARY_NAME\"'\", '\$START_TIME', .pid, .tid, .ph, .ts, .dur, .cat, .name, .args.detail, .args.count, .args[\"avg ms\"], .args.name]' '{}' > \"${OUTPUT_DIR}/\$\$\"
"
# Now you can upload it as follows: