mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-15 12:14:18 +00:00
Merge branch 'master' into fix-keeper-with-xid-64
This commit is contained in:
commit
b24239a75c
@ -369,11 +369,15 @@ namespace PackedZeroTraits
|
||||
{
|
||||
template <typename Second, template <typename, typename> class PackedPairNoInit>
|
||||
inline bool check(const PackedPairNoInit<StringRef, Second> p)
|
||||
{ return 0 == p.key.size; }
|
||||
{
|
||||
return 0 == p.key.size;
|
||||
}
|
||||
|
||||
template <typename Second, template <typename, typename> class PackedPairNoInit>
|
||||
inline void set(PackedPairNoInit<StringRef, Second> & p)
|
||||
{ p.key.size = 0; }
|
||||
{
|
||||
p.key.size = 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
@ -952,6 +952,8 @@ private:
|
||||
static std::pair<LoggerMapIterator, bool> add(Logger * pLogger);
|
||||
static std::optional<LoggerMapIterator> find(const std::string & name);
|
||||
static Logger * findRawPtr(const std::string & name);
|
||||
void unsafeSetChannel(Channel * pChannel);
|
||||
Channel* unsafeGetChannel() const;
|
||||
|
||||
Logger();
|
||||
Logger(const Logger &);
|
||||
|
@ -61,6 +61,13 @@ Logger::~Logger()
|
||||
|
||||
|
||||
void Logger::setChannel(Channel* pChannel)
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(getLoggerMutex());
|
||||
unsafeSetChannel(pChannel);
|
||||
}
|
||||
|
||||
|
||||
void Logger::unsafeSetChannel(Channel* pChannel)
|
||||
{
|
||||
if (_pChannel) _pChannel->release();
|
||||
_pChannel = pChannel;
|
||||
@ -69,6 +76,14 @@ void Logger::setChannel(Channel* pChannel)
|
||||
|
||||
|
||||
Channel* Logger::getChannel() const
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(getLoggerMutex());
|
||||
|
||||
return unsafeGetChannel();
|
||||
}
|
||||
|
||||
|
||||
Channel* Logger::unsafeGetChannel() const
|
||||
{
|
||||
return _pChannel;
|
||||
}
|
||||
@ -89,7 +104,7 @@ void Logger::setLevel(const std::string& level)
|
||||
void Logger::setProperty(const std::string& name, const std::string& value)
|
||||
{
|
||||
if (name == "channel")
|
||||
setChannel(LoggingRegistry::defaultRegistry().channelForName(value));
|
||||
unsafeSetChannel(LoggingRegistry::defaultRegistry().channelForName(value));
|
||||
else if (name == "level")
|
||||
setLevel(value);
|
||||
else
|
||||
@ -160,7 +175,7 @@ void Logger::setChannel(const std::string& name, Channel* pChannel)
|
||||
if (len == 0 ||
|
||||
(it.first.compare(0, len, name) == 0 && (it.first.length() == len || it.first[len] == '.')))
|
||||
{
|
||||
it.second.logger->setChannel(pChannel);
|
||||
it.second.logger->unsafeSetChannel(pChannel);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -393,7 +408,7 @@ std::pair<Logger::LoggerMapIterator, bool> Logger::unsafeGet(const std::string&
|
||||
else
|
||||
{
|
||||
Logger& par = parent(name);
|
||||
logger = new Logger(name, par.getChannel(), par.getLevel());
|
||||
logger = new Logger(name, par.unsafeGetChannel(), par.getLevel());
|
||||
}
|
||||
|
||||
return add(logger);
|
||||
|
105
ci_v2/docker/fasttest/Dockerfile
Normal file
105
ci_v2/docker/fasttest/Dockerfile
Normal file
@ -0,0 +1,105 @@
|
||||
# docker build -t clickhouse/fasttest .
|
||||
FROM ubuntu:22.04
|
||||
|
||||
# ARG for quick switch to a given ubuntu mirror
|
||||
ARG apt_archive="http://archive.ubuntu.com"
|
||||
RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list
|
||||
|
||||
ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=18
|
||||
|
||||
RUN apt-get update \
|
||||
&& apt-get install \
|
||||
apt-transport-https \
|
||||
apt-utils \
|
||||
ca-certificates \
|
||||
curl \
|
||||
gnupg \
|
||||
lsb-release \
|
||||
wget \
|
||||
git \
|
||||
--yes --no-install-recommends --verbose-versions \
|
||||
&& export LLVM_PUBKEY_HASH="bda960a8da687a275a2078d43c111d66b1c6a893a3275271beedf266c1ff4a0cdecb429c7a5cccf9f486ea7aa43fd27f" \
|
||||
&& wget -nv -O /tmp/llvm-snapshot.gpg.key https://apt.llvm.org/llvm-snapshot.gpg.key \
|
||||
&& echo "${LLVM_PUBKEY_HASH} /tmp/llvm-snapshot.gpg.key" | sha384sum -c \
|
||||
&& apt-key add /tmp/llvm-snapshot.gpg.key \
|
||||
&& export CODENAME="$(lsb_release --codename --short | tr 'A-Z' 'a-z')" \
|
||||
&& echo "deb https://apt.llvm.org/${CODENAME}/ llvm-toolchain-${CODENAME}-${LLVM_VERSION} main" >> \
|
||||
/etc/apt/sources.list \
|
||||
&& apt-get update \
|
||||
&& apt-get install --yes --no-install-recommends --verbose-versions llvm-${LLVM_VERSION} \
|
||||
&& apt-get clean \
|
||||
&& rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
|
||||
|
||||
# moreutils - provides ts fo FT
|
||||
# expect, bzip2 - requried by FT
|
||||
# bsdmainutils - provides hexdump for FT
|
||||
|
||||
RUN apt-get update \
|
||||
&& apt-get install \
|
||||
clang-${LLVM_VERSION} \
|
||||
cmake \
|
||||
libclang-${LLVM_VERSION}-dev \
|
||||
libclang-rt-${LLVM_VERSION}-dev \
|
||||
lld-${LLVM_VERSION} \
|
||||
llvm-${LLVM_VERSION}-dev \
|
||||
lsof \
|
||||
ninja-build \
|
||||
python3 \
|
||||
python3-pip \
|
||||
zstd \
|
||||
moreutils \
|
||||
expect \
|
||||
bsdmainutils \
|
||||
pv \
|
||||
jq \
|
||||
bzip2 \
|
||||
--yes --no-install-recommends \
|
||||
&& apt-get clean \
|
||||
&& rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
|
||||
|
||||
COPY --from=clickhouse/cctools:0d6b90a7a490 /opt/gdb /opt/gdb
|
||||
# Give suid to gdb to grant it attach permissions
|
||||
RUN chmod u+s /opt/gdb/bin/gdb
|
||||
ENV PATH="/opt/gdb/bin:${PATH}"
|
||||
|
||||
# This symlink is required by gcc to find the lld linker
|
||||
RUN ln -s /usr/bin/lld-${LLVM_VERSION} /usr/bin/ld.lld
|
||||
# FIXME: workaround for "The imported target "merge-fdata" references the file" error
|
||||
# https://salsa.debian.org/pkg-llvm-team/llvm-toolchain/-/commit/992e52c0b156a5ba9c6a8a54f8c4857ddd3d371d
|
||||
RUN sed -i '/_IMPORT_CHECK_FILES_FOR_\(mlir-\|llvm-bolt\|merge-fdata\|MLIR\)/ {s|^|#|}' /usr/lib/llvm-${LLVM_VERSION}/lib/cmake/llvm/LLVMExports-*.cmake
|
||||
|
||||
# LLVM changes paths for compiler-rt libraries. For some reason clang-18.1.8 cannot catch up libraries from default install path.
|
||||
# It's very dirty workaround, better to build compiler and LLVM ourself and use it. Details: https://github.com/llvm/llvm-project/issues/95792
|
||||
RUN test ! -d /usr/lib/llvm-18/lib/clang/18/lib/x86_64-pc-linux-gnu || ln -s /usr/lib/llvm-18/lib/clang/18/lib/x86_64-pc-linux-gnu /usr/lib/llvm-18/lib/clang/18/lib/x86_64-unknown-linux-gnu
|
||||
|
||||
ARG TARGETARCH
|
||||
ARG SCCACHE_VERSION=v0.7.7
|
||||
ENV SCCACHE_IGNORE_SERVER_IO_ERROR=1
|
||||
# sccache requires a value for the region. So by default we use The Default Region
|
||||
ENV SCCACHE_REGION=us-east-1
|
||||
RUN arch=${TARGETARCH} \
|
||||
&& case $arch in \
|
||||
amd64) rarch=x86_64 ;; \
|
||||
arm64) rarch=aarch64 ;; \
|
||||
esac \
|
||||
&& curl -Ls "https://github.com/mozilla/sccache/releases/download/$SCCACHE_VERSION/sccache-$SCCACHE_VERSION-$rarch-unknown-linux-musl.tar.gz" | \
|
||||
tar xz -C /tmp \
|
||||
&& mv "/tmp/sccache-$SCCACHE_VERSION-$rarch-unknown-linux-musl/sccache" /usr/bin \
|
||||
&& rm "/tmp/sccache-$SCCACHE_VERSION-$rarch-unknown-linux-musl" -r
|
||||
|
||||
COPY requirements.txt /
|
||||
RUN pip3 install --no-cache-dir -r /requirements.txt
|
||||
|
||||
# chmod 777 to make the container user independent
|
||||
RUN mkdir -p /var/lib/clickhouse \
|
||||
&& chmod 777 /var/lib/clickhouse
|
||||
|
||||
ENV TZ=Europe/Amsterdam
|
||||
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
|
||||
|
||||
RUN groupadd --system --gid 1000 clickhouse \
|
||||
&& useradd --system --gid 1000 --uid 1000 -m clickhouse \
|
||||
&& mkdir -p /.cache/sccache && chmod 777 /.cache/sccache
|
||||
|
||||
ENV PYTHONPATH="/wd"
|
||||
ENV PYTHONUNBUFFERED=1
|
6
ci_v2/docker/fasttest/requirements.txt
Normal file
6
ci_v2/docker/fasttest/requirements.txt
Normal file
@ -0,0 +1,6 @@
|
||||
Jinja2==3.1.3
|
||||
numpy==1.26.4
|
||||
requests==2.32.3
|
||||
pandas==1.5.3
|
||||
scipy==1.12.0
|
||||
#https://clickhouse-builds.s3.amazonaws.com/packages/praktika-0.1-py3-none-any.whl
|
@ -1,4 +1,5 @@
|
||||
requests==2.32.3
|
||||
yamllint==1.26.3
|
||||
codespell==2.2.1
|
||||
https://clickhouse-builds.s3.amazonaws.com/packages/praktika-0.1-py3-none-any.whl
|
||||
#use praktika from CH repo
|
||||
#https://clickhouse-builds.s3.amazonaws.com/packages/praktika-0.1-py3-none-any.whl
|
||||
|
@ -2,7 +2,6 @@ import math
|
||||
import multiprocessing
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
from concurrent.futures import ProcessPoolExecutor
|
||||
from pathlib import Path
|
||||
|
||||
@ -51,25 +50,6 @@ def run_check_concurrent(check_name, check_function, files, nproc=NPROC):
|
||||
return result
|
||||
|
||||
|
||||
def run_simple_check(check_name, check_function, **kwargs):
|
||||
stop_watch = Utils.Stopwatch()
|
||||
|
||||
error = check_function(**kwargs)
|
||||
|
||||
result = Result(
|
||||
name=check_name,
|
||||
status=Result.Status.SUCCESS if not error else Result.Status.FAILED,
|
||||
start_time=stop_watch.start_time,
|
||||
duration=stop_watch.duration,
|
||||
info=error,
|
||||
)
|
||||
return result
|
||||
|
||||
|
||||
def run_check(check_name, check_function, files):
|
||||
return run_check_concurrent(check_name, check_function, files, nproc=1)
|
||||
|
||||
|
||||
def check_duplicate_includes(file_path):
|
||||
includes = []
|
||||
with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
|
||||
@ -117,7 +97,7 @@ def check_xmllint(file_paths):
|
||||
def check_functional_test_cases(files):
|
||||
"""
|
||||
Queries with event_date should have yesterday() not today()
|
||||
NOTE: it is not that accuate, but at least something.
|
||||
NOTE: it is not that accurate, but at least something.
|
||||
"""
|
||||
|
||||
patterns = [
|
||||
@ -345,66 +325,58 @@ if __name__ == "__main__":
|
||||
)
|
||||
)
|
||||
results.append(
|
||||
run_check(
|
||||
check_name="Check Tests Numbers",
|
||||
check_function=check_gaps_in_tests_numbers,
|
||||
files=functional_test_files,
|
||||
Result.create_from_command_execution(
|
||||
name="Check Tests Numbers",
|
||||
command=check_gaps_in_tests_numbers,
|
||||
command_args=[functional_test_files],
|
||||
)
|
||||
)
|
||||
results.append(
|
||||
run_simple_check(
|
||||
check_name="Check Broken Symlinks",
|
||||
check_function=check_broken_links,
|
||||
path="./",
|
||||
exclude_paths=["contrib/", "metadata/", "programs/server/data"],
|
||||
Result.create_from_command_execution(
|
||||
name="Check Broken Symlinks",
|
||||
command=check_broken_links,
|
||||
command_kwargs={
|
||||
"path": "./",
|
||||
"exclude_paths": ["contrib/", "metadata/", "programs/server/data"],
|
||||
},
|
||||
)
|
||||
)
|
||||
results.append(
|
||||
run_simple_check(
|
||||
check_name="Check CPP code",
|
||||
check_function=check_cpp_code,
|
||||
Result.create_from_command_execution(
|
||||
name="Check CPP code",
|
||||
command=check_cpp_code,
|
||||
)
|
||||
)
|
||||
results.append(
|
||||
run_simple_check(
|
||||
check_name="Check Submodules",
|
||||
check_function=check_repo_submodules,
|
||||
Result.create_from_command_execution(
|
||||
name="Check Submodules",
|
||||
command=check_repo_submodules,
|
||||
)
|
||||
)
|
||||
results.append(
|
||||
run_check(
|
||||
check_name="Check File Names",
|
||||
check_function=check_file_names,
|
||||
files=all_files,
|
||||
Result.create_from_command_execution(
|
||||
name="Check File Names",
|
||||
command=check_file_names,
|
||||
command_args=[all_files],
|
||||
)
|
||||
)
|
||||
results.append(
|
||||
run_simple_check(
|
||||
check_name="Check Many Different Things",
|
||||
check_function=check_other,
|
||||
Result.create_from_command_execution(
|
||||
name="Check Many Different Things",
|
||||
command=check_other,
|
||||
)
|
||||
)
|
||||
results.append(
|
||||
run_simple_check(
|
||||
check_name="Check Codespell",
|
||||
check_function=check_codespell,
|
||||
Result.create_from_command_execution(
|
||||
name="Check Codespell",
|
||||
command=check_codespell,
|
||||
)
|
||||
)
|
||||
results.append(
|
||||
run_simple_check(
|
||||
check_name="Check Aspell",
|
||||
check_function=check_aspell,
|
||||
Result.create_from_command_execution(
|
||||
name="Check Aspell",
|
||||
command=check_aspell,
|
||||
)
|
||||
)
|
||||
|
||||
res = Result.create_from(results=results, stopwatch=stop_watch).dump()
|
||||
|
||||
if not res.is_ok():
|
||||
print("Style check: failed")
|
||||
for result in results:
|
||||
if not result.is_ok():
|
||||
print("Failed check:")
|
||||
print(" | ", result)
|
||||
sys.exit(1)
|
||||
else:
|
||||
print("Style check: ok")
|
||||
Result.create_from(results=results, stopwatch=stop_watch).finish_job_accordingly()
|
||||
|
329
ci_v2/jobs/fast_test.py
Normal file
329
ci_v2/jobs/fast_test.py
Normal file
@ -0,0 +1,329 @@
|
||||
import threading
|
||||
from pathlib import Path
|
||||
|
||||
from ci_v2.jobs.scripts.functional_tests_results import FTResultsProcessor
|
||||
from praktika.environment import Environment
|
||||
from praktika.result import Result
|
||||
from praktika.settings import Settings
|
||||
from praktika.utils import MetaClasses, Shell, Utils
|
||||
|
||||
|
||||
class ClickHouseProc:
|
||||
def __init__(self):
|
||||
self.ch_config_dir = f"{Settings.TEMP_DIR}/etc/clickhouse-server"
|
||||
self.pid_file = f"{self.ch_config_dir}/clickhouse-server.pid"
|
||||
self.config_file = f"{self.ch_config_dir}/config.xml"
|
||||
self.user_files_path = f"{self.ch_config_dir}/user_files"
|
||||
self.test_output_file = f"{Settings.OUTPUT_DIR}/test_result.txt"
|
||||
self.command = f"clickhouse-server --config-file {self.config_file} --pid-file {self.pid_file} -- --path {self.ch_config_dir} --user_files_path {self.user_files_path} --top_level_domains_path {self.ch_config_dir}/top_level_domains --keeper_server.storage_path {self.ch_config_dir}/coordination"
|
||||
self.proc = None
|
||||
self.pid = 0
|
||||
nproc = int(Utils.cpu_count() / 2)
|
||||
self.fast_test_command = f"clickhouse-test --hung-check --fast-tests-only --no-random-settings --no-random-merge-tree-settings --no-long --testname --shard --zookeeper --check-zookeeper-session --order random --print-time --report-logs-stats --jobs {nproc} -- '' | ts '%Y-%m-%d %H:%M:%S' \
|
||||
| tee -a \"{self.test_output_file}\""
|
||||
# TODO: store info in case of failure
|
||||
self.info = ""
|
||||
self.info_file = ""
|
||||
|
||||
Utils.set_env("CLICKHOUSE_CONFIG_DIR", self.ch_config_dir)
|
||||
Utils.set_env("CLICKHOUSE_CONFIG", self.config_file)
|
||||
Utils.set_env("CLICKHOUSE_USER_FILES", self.user_files_path)
|
||||
Utils.set_env("CLICKHOUSE_SCHEMA_FILES", f"{self.ch_config_dir}/format_schemas")
|
||||
|
||||
def start(self):
|
||||
print("Starting ClickHouse server")
|
||||
Shell.check(f"rm {self.pid_file}")
|
||||
|
||||
def run_clickhouse():
|
||||
self.proc = Shell.run_async(
|
||||
self.command, verbose=True, suppress_output=True
|
||||
)
|
||||
|
||||
thread = threading.Thread(target=run_clickhouse)
|
||||
thread.daemon = True # Allow program to exit even if thread is still running
|
||||
thread.start()
|
||||
|
||||
# self.proc = Shell.run_async(self.command, verbose=True)
|
||||
|
||||
started = False
|
||||
try:
|
||||
for _ in range(5):
|
||||
pid = Shell.get_output(f"cat {self.pid_file}").strip()
|
||||
if not pid:
|
||||
Utils.sleep(1)
|
||||
continue
|
||||
started = True
|
||||
print(f"Got pid from fs [{pid}]")
|
||||
_ = int(pid)
|
||||
break
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if not started:
|
||||
stdout = self.proc.stdout.read().strip() if self.proc.stdout else ""
|
||||
stderr = self.proc.stderr.read().strip() if self.proc.stderr else ""
|
||||
Utils.print_formatted_error("Failed to start ClickHouse", stdout, stderr)
|
||||
return False
|
||||
|
||||
print(f"ClickHouse server started successfully, pid [{pid}]")
|
||||
return True
|
||||
|
||||
def wait_ready(self):
|
||||
res, out, err = 0, "", ""
|
||||
attempts = 30
|
||||
delay = 2
|
||||
for attempt in range(attempts):
|
||||
res, out, err = Shell.get_res_stdout_stderr(
|
||||
'clickhouse-client --query "select 1"', verbose=True
|
||||
)
|
||||
if out.strip() == "1":
|
||||
print("Server ready")
|
||||
break
|
||||
else:
|
||||
print(f"Server not ready, wait")
|
||||
Utils.sleep(delay)
|
||||
else:
|
||||
Utils.print_formatted_error(
|
||||
f"Server not ready after [{attempts*delay}s]", out, err
|
||||
)
|
||||
return False
|
||||
return True
|
||||
|
||||
def run_fast_test(self):
|
||||
if Path(self.test_output_file).exists():
|
||||
Path(self.test_output_file).unlink()
|
||||
exit_code = Shell.run(self.fast_test_command)
|
||||
return exit_code == 0
|
||||
|
||||
def terminate(self):
|
||||
print("Terminate ClickHouse process")
|
||||
timeout = 10
|
||||
if self.proc:
|
||||
Utils.terminate_process_group(self.proc.pid)
|
||||
|
||||
self.proc.terminate()
|
||||
try:
|
||||
self.proc.wait(timeout=10)
|
||||
print(f"Process {self.proc.pid} terminated gracefully.")
|
||||
except Exception:
|
||||
print(
|
||||
f"Process {self.proc.pid} did not terminate in {timeout} seconds, killing it..."
|
||||
)
|
||||
Utils.terminate_process_group(self.proc.pid, force=True)
|
||||
self.proc.wait() # Wait for the process to be fully killed
|
||||
print(f"Process {self.proc} was killed.")
|
||||
|
||||
|
||||
def clone_submodules():
|
||||
submodules_to_update = [
|
||||
"contrib/sysroot",
|
||||
"contrib/magic_enum",
|
||||
"contrib/abseil-cpp",
|
||||
"contrib/boost",
|
||||
"contrib/zlib-ng",
|
||||
"contrib/libxml2",
|
||||
"contrib/libunwind",
|
||||
"contrib/fmtlib",
|
||||
"contrib/aklomp-base64",
|
||||
"contrib/cctz",
|
||||
"contrib/libcpuid",
|
||||
"contrib/libdivide",
|
||||
"contrib/double-conversion",
|
||||
"contrib/llvm-project",
|
||||
"contrib/lz4",
|
||||
"contrib/zstd",
|
||||
"contrib/fastops",
|
||||
"contrib/rapidjson",
|
||||
"contrib/re2",
|
||||
"contrib/sparsehash-c11",
|
||||
"contrib/croaring",
|
||||
"contrib/miniselect",
|
||||
"contrib/xz",
|
||||
"contrib/dragonbox",
|
||||
"contrib/fast_float",
|
||||
"contrib/NuRaft",
|
||||
"contrib/jemalloc",
|
||||
"contrib/replxx",
|
||||
"contrib/wyhash",
|
||||
"contrib/c-ares",
|
||||
"contrib/morton-nd",
|
||||
"contrib/xxHash",
|
||||
"contrib/expected",
|
||||
"contrib/simdjson",
|
||||
"contrib/liburing",
|
||||
"contrib/libfiu",
|
||||
"contrib/incbin",
|
||||
"contrib/yaml-cpp",
|
||||
]
|
||||
|
||||
res = Shell.check("git submodule sync", verbose=True, strict=True)
|
||||
res = res and Shell.check("git submodule init", verbose=True, strict=True)
|
||||
res = res and Shell.check(
|
||||
command=f"xargs --max-procs={min([Utils.cpu_count(), 20])} --null --no-run-if-empty --max-args=1 git submodule update --depth 1 --single-branch",
|
||||
stdin_str="\0".join(submodules_to_update) + "\0",
|
||||
timeout=120,
|
||||
retries=3,
|
||||
verbose=True,
|
||||
)
|
||||
res = res and Shell.check("git submodule foreach git reset --hard", verbose=True)
|
||||
res = res and Shell.check("git submodule foreach git checkout @ -f", verbose=True)
|
||||
res = res and Shell.check("git submodule foreach git clean -xfd", verbose=True)
|
||||
return res
|
||||
|
||||
|
||||
def update_path_ch_config(config_file_path=""):
|
||||
print("Updating path in clickhouse config")
|
||||
config_file_path = (
|
||||
config_file_path or f"{Settings.TEMP_DIR}/etc/clickhouse-server/config.xml"
|
||||
)
|
||||
ssl_config_file_path = (
|
||||
f"{Settings.TEMP_DIR}/etc/clickhouse-server/config.d/ssl_certs.xml"
|
||||
)
|
||||
try:
|
||||
with open(config_file_path, "r", encoding="utf-8") as file:
|
||||
content = file.read()
|
||||
|
||||
with open(ssl_config_file_path, "r", encoding="utf-8") as file:
|
||||
ssl_config_content = file.read()
|
||||
content = content.replace(">/var/", f">{Settings.TEMP_DIR}/var/")
|
||||
content = content.replace(">/etc/", f">{Settings.TEMP_DIR}/etc/")
|
||||
ssl_config_content = ssl_config_content.replace(
|
||||
">/etc/", f">{Settings.TEMP_DIR}/etc/"
|
||||
)
|
||||
with open(config_file_path, "w", encoding="utf-8") as file:
|
||||
file.write(content)
|
||||
with open(ssl_config_file_path, "w", encoding="utf-8") as file:
|
||||
file.write(ssl_config_content)
|
||||
except Exception as e:
|
||||
print(f"ERROR: failed to update config, exception: {e}")
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
class JobStages(metaclass=MetaClasses.WithIter):
|
||||
CHECKOUT_SUBMODULES = "checkout"
|
||||
CMAKE = "cmake"
|
||||
BUILD = "build"
|
||||
CONFIG = "config"
|
||||
TEST = "test"
|
||||
|
||||
|
||||
def main():
|
||||
stop_watch = Utils.Stopwatch()
|
||||
|
||||
stages = list(JobStages)
|
||||
stage = Environment.LOCAL_RUN_PARAM or JobStages.CHECKOUT_SUBMODULES
|
||||
if stage:
|
||||
assert stage in JobStages, f"--param must be one of [{list(JobStages)}]"
|
||||
print(f"Job will start from stage [{stage}]")
|
||||
while stage in stages:
|
||||
stages.pop(0)
|
||||
stages.insert(0, stage)
|
||||
|
||||
current_directory = Utils.cwd()
|
||||
build_dir = f"{Settings.TEMP_DIR}/build"
|
||||
|
||||
Utils.add_to_PATH(f"{build_dir}/programs:{current_directory}/tests")
|
||||
|
||||
res = True
|
||||
results = []
|
||||
|
||||
if res and JobStages.CHECKOUT_SUBMODULES in stages:
|
||||
Shell.check(f"rm -rf {build_dir} && mkdir -p {build_dir}")
|
||||
results.append(
|
||||
Result.create_from_command_execution(
|
||||
name="Checkout Submodules for Minimal Build",
|
||||
command=clone_submodules,
|
||||
)
|
||||
)
|
||||
res = results[-1].is_ok()
|
||||
|
||||
if res and JobStages.CMAKE in stages:
|
||||
results.append(
|
||||
Result.create_from_command_execution(
|
||||
name="Cmake configuration",
|
||||
command=f"cmake {current_directory} -DCMAKE_CXX_COMPILER=clang++-18 -DCMAKE_C_COMPILER=clang-18 \
|
||||
-DCMAKE_TOOLCHAIN_FILE={current_directory}/cmake/linux/toolchain-x86_64-musl.cmake -DENABLE_LIBRARIES=0 \
|
||||
-DENABLE_TESTS=0 -DENABLE_UTILS=0 -DENABLE_THINLTO=0 -DENABLE_NURAFT=1 -DENABLE_SIMDJSON=1 \
|
||||
-DENABLE_JEMALLOC=1 -DENABLE_LIBURING=1 -DENABLE_YAML_CPP=1 -DCOMPILER_CACHE=sccache",
|
||||
workdir=build_dir,
|
||||
with_log=True,
|
||||
)
|
||||
)
|
||||
res = results[-1].is_ok()
|
||||
|
||||
if res and JobStages.BUILD in stages:
|
||||
Shell.check("sccache --show-stats")
|
||||
results.append(
|
||||
Result.create_from_command_execution(
|
||||
name="Build ClickHouse",
|
||||
command="ninja clickhouse-bundle clickhouse-stripped",
|
||||
workdir=build_dir,
|
||||
with_log=True,
|
||||
)
|
||||
)
|
||||
Shell.check("sccache --show-stats")
|
||||
res = results[-1].is_ok()
|
||||
|
||||
if res and JobStages.BUILD in stages:
|
||||
commands = [
|
||||
f"mkdir -p {Settings.OUTPUT_DIR}/binaries",
|
||||
f"cp ./programs/clickhouse {Settings.OUTPUT_DIR}/binaries/clickhouse",
|
||||
f"zstd --threads=0 --force programs/clickhouse-stripped -o {Settings.OUTPUT_DIR}/binaries/clickhouse-stripped.zst",
|
||||
"sccache --show-stats",
|
||||
"clickhouse-client --version",
|
||||
"clickhouse-test --help",
|
||||
]
|
||||
results.append(
|
||||
Result.create_from_command_execution(
|
||||
name="Check and Compress binary",
|
||||
command=commands,
|
||||
workdir=build_dir,
|
||||
with_log=True,
|
||||
)
|
||||
)
|
||||
res = results[-1].is_ok()
|
||||
|
||||
if res and JobStages.CONFIG in stages:
|
||||
commands = [
|
||||
f"rm -rf {Settings.TEMP_DIR}/etc/ && mkdir -p {Settings.TEMP_DIR}/etc/clickhouse-client {Settings.TEMP_DIR}/etc/clickhouse-server",
|
||||
f"cp {current_directory}/programs/server/config.xml {current_directory}/programs/server/users.xml {Settings.TEMP_DIR}/etc/clickhouse-server/",
|
||||
f"{current_directory}/tests/config/install.sh {Settings.TEMP_DIR}/etc/clickhouse-server {Settings.TEMP_DIR}/etc/clickhouse-client",
|
||||
# f"cp -a {current_directory}/programs/server/config.d/log_to_console.xml {Settings.TEMP_DIR}/etc/clickhouse-server/config.d/",
|
||||
f"rm -f {Settings.TEMP_DIR}/etc/clickhouse-server/config.d/secure_ports.xml",
|
||||
update_path_ch_config,
|
||||
]
|
||||
results.append(
|
||||
Result.create_from_command_execution(
|
||||
name="Install ClickHouse Config",
|
||||
command=commands,
|
||||
with_log=True,
|
||||
)
|
||||
)
|
||||
res = results[-1].is_ok()
|
||||
|
||||
CH = ClickHouseProc()
|
||||
if res and JobStages.TEST in stages:
|
||||
stop_watch_ = Utils.Stopwatch()
|
||||
step_name = "Start ClickHouse Server"
|
||||
print(step_name)
|
||||
res = CH.start()
|
||||
res = res and CH.wait_ready()
|
||||
results.append(
|
||||
Result.create_from(name=step_name, status=res, stopwatch=stop_watch_)
|
||||
)
|
||||
|
||||
if res and JobStages.TEST in stages:
|
||||
step_name = "Tests"
|
||||
print(step_name)
|
||||
res = res and CH.run_fast_test()
|
||||
if res:
|
||||
results.append(FTResultsProcessor(wd=Settings.OUTPUT_DIR).run())
|
||||
|
||||
CH.terminate()
|
||||
|
||||
Result.create_from(results=results, stopwatch=stop_watch).finish_job_accordingly()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
@ -14,7 +14,8 @@
|
||||
|
||||
LC_ALL="en_US.UTF-8"
|
||||
ROOT_PATH="."
|
||||
EXCLUDE_DIRS='build/|integration/|widechar_width/|glibc-compatibility/|poco/|memcpy/|consistent-hashing|benchmark|tests/.*.cpp|utils/keeper-bench/example.yaml'
|
||||
EXCLUDE='build/|integration/|widechar_width/|glibc-compatibility/|poco/|memcpy/|consistent-hashing|benchmark|tests/.*.cpp|utils/keeper-bench/example.yaml'
|
||||
EXCLUDE_DOCS='Settings\.cpp|FormatFactorySettingsDeclaration\.h'
|
||||
|
||||
# From [1]:
|
||||
# But since array_to_string_internal() in array.c still loops over array
|
||||
@ -31,7 +32,8 @@ function in_array()
|
||||
}
|
||||
|
||||
find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' 2>/dev/null |
|
||||
grep -vP $EXCLUDE_DIRS |
|
||||
grep -vP $EXCLUDE |
|
||||
grep -vP $EXCLUDE_DOCS |
|
||||
xargs grep $@ -P '((class|struct|namespace|enum|if|for|while|else|throw|switch).*|\)(\s*const)?(\s*override)?\s*)\{$|\s$|^ {1,3}[^\* ]\S|\t|^\s*(if|else if|if constexpr|else if constexpr|for|while|catch|switch)\(|\( [^\s\\]|\S \)' |
|
||||
# a curly brace not in a new line, but not for the case of C++11 init or agg. initialization | trailing whitespace | number of ws not a multiple of 4, but not in the case of comment continuation | missing whitespace after for/if/while... before opening brace | whitespaces inside braces
|
||||
grep -v -P '(//|:\s+\*|\$\(\()| \)"'
|
||||
@ -39,12 +41,12 @@ find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' 2>/dev/n
|
||||
|
||||
# Tabs
|
||||
find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' 2>/dev/null |
|
||||
grep -vP $EXCLUDE_DIRS |
|
||||
xargs grep $@ -F $'\t'
|
||||
grep -vP $EXCLUDE |
|
||||
xargs grep $@ -F $'\t' && echo '^ tabs are not allowed'
|
||||
|
||||
# // namespace comments are unneeded
|
||||
find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' 2>/dev/null |
|
||||
grep -vP $EXCLUDE_DIRS |
|
||||
grep -vP $EXCLUDE |
|
||||
xargs grep $@ -P '}\s*//+\s*namespace\s*'
|
||||
|
||||
# Broken symlinks
|
||||
@ -52,26 +54,26 @@ find -L $ROOT_PATH -type l 2>/dev/null | grep -v contrib && echo "^ Broken symli
|
||||
|
||||
# Duplicated or incorrect setting declarations
|
||||
SETTINGS_FILE=$(mktemp)
|
||||
cat $ROOT_PATH/src/Core/Settings.cpp $ROOT_PATH/src/Core/FormatFactorySettingsDeclaration.h | grep "M(" | awk '{print substr($2, 0, length($2) - 1) " " substr($1, 3, length($1) - 3) " SettingsDeclaration" }' > ${SETTINGS_FILE}
|
||||
find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' | xargs grep "extern const Settings" -T | awk '{print substr($5, 0, length($5) -1) " " substr($4, 9) " " substr($1, 0, length($1) - 1)}' >> ${SETTINGS_FILE}
|
||||
ALL_DECLARATION_FILES="
|
||||
$ROOT_PATH/src/Core/Settings.cpp
|
||||
$ROOT_PATH/src/Storages/MergeTree/MergeTreeSettings.cpp
|
||||
$ROOT_PATH/src/Core/FormatFactorySettingsDeclaration.h"
|
||||
|
||||
# Duplicate extern declarations for settings
|
||||
awk '{if (seen[$0]++) print $3 " -> " $1 ;}' ${SETTINGS_FILE} | while read line;
|
||||
cat $ROOT_PATH/src/Core/Settings.cpp $ROOT_PATH/src/Core/FormatFactorySettingsDeclaration.h | grep "M(" | awk '{print substr($2, 0, length($2) - 1) " Settings" substr($1, 3, length($1) - 3) " SettingsDeclaration" }' | sort | uniq > ${SETTINGS_FILE}
|
||||
cat $ROOT_PATH/src/Storages/MergeTree/MergeTreeSettings.cpp | grep "M(" | awk '{print substr($2, 0, length($2) - 1) " MergeTreeSettings" substr($1, 3, length($1) - 3) " SettingsDeclaration" }' | sort | uniq >> ${SETTINGS_FILE}
|
||||
|
||||
# Check that if there are duplicated settings (declared in different objects) they all have the same type (it's simpler to validate style with that assert)
|
||||
for setting in $(awk '{print $1 " " $2}' ${SETTINGS_FILE} | sed -e 's/MergeTreeSettings//g' -e 's/Settings//g' | sort | uniq | awk '{ print $1 }' | uniq -d);
|
||||
do
|
||||
echo "Found duplicated setting declaration in: $line"
|
||||
echo "# Found multiple definitions of setting ${setting} with different types: "
|
||||
grep --line-number " ${setting}," ${ALL_DECLARATION_FILES} | awk '{print " > " $0 }'
|
||||
done
|
||||
|
||||
# Incorrect declarations for settings
|
||||
for setting in $(awk '{print $1 " " $2}' ${SETTINGS_FILE} | sort | uniq | awk '{ print $1 }' | sort | uniq -d);
|
||||
do
|
||||
expected=$(grep "^$setting " ${SETTINGS_FILE} | grep SettingsDeclaration | awk '{ print $2 }')
|
||||
grep "^$setting " ${SETTINGS_FILE} | grep -v " $expected" | awk '{ print $3 " found setting " $1 " with type " $2 }' | while read line;
|
||||
do
|
||||
echo "In $line but it should be $expected"
|
||||
done
|
||||
done
|
||||
# We append all uses of extern found in implementation files to validate them in a single pass and avoid reading the same files over and over
|
||||
find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' | xargs grep -e "^\s*extern const Settings" -e "^\s**extern const MergeTreeSettings" -T | awk '{print substr($5, 0, length($5) -1) " " $4 " " substr($1, 0, length($1) - 1)}' >> ${SETTINGS_FILE}
|
||||
|
||||
rm ${SETTINGS_FILE}
|
||||
# Duplicated or incorrect setting declarations
|
||||
bash $ROOT_PATH/utils/check-style/check-settings-style
|
||||
|
||||
# Unused/Undefined/Duplicates ErrorCodes/ProfileEvents/CurrentMetrics
|
||||
declare -A EXTERN_TYPES
|
||||
@ -91,12 +93,14 @@ EXTERN_TYPES_EXCLUDES=(
|
||||
ProfileEvents::Timer
|
||||
ProfileEvents::Type
|
||||
ProfileEvents::TypeEnum
|
||||
ProfileEvents::ValueType
|
||||
ProfileEvents::dumpToMapColumn
|
||||
ProfileEvents::getProfileEvents
|
||||
ProfileEvents::ThreadIdToCountersSnapshot
|
||||
ProfileEvents::LOCAL_NAME
|
||||
ProfileEvents::keeper_profile_events
|
||||
ProfileEvents::CountersIncrement
|
||||
ProfileEvents::size
|
||||
|
||||
CurrentMetrics::add
|
||||
CurrentMetrics::sub
|
||||
@ -108,6 +112,7 @@ EXTERN_TYPES_EXCLUDES=(
|
||||
CurrentMetrics::values
|
||||
CurrentMetrics::Value
|
||||
CurrentMetrics::keeper_metrics
|
||||
CurrentMetrics::size
|
||||
|
||||
ErrorCodes::ErrorCode
|
||||
ErrorCodes::getName
|
||||
@ -130,7 +135,7 @@ for extern_type in ${!EXTERN_TYPES[@]}; do
|
||||
# and this matches with zkutil::CreateMode
|
||||
grep -v -e 'src/Common/ZooKeeper/Types.h' -e 'src/Coordination/KeeperConstants.cpp'
|
||||
} | {
|
||||
grep -vP $EXCLUDE_DIRS | xargs grep -l -P "extern const $type_of_extern $allowed_chars"
|
||||
grep -vP $EXCLUDE | xargs grep -l -P "extern const $type_of_extern $allowed_chars"
|
||||
} | while read file; do
|
||||
grep -P "extern const $type_of_extern $allowed_chars;" $file | sed -r -e "s/^.*?extern const $type_of_extern ($allowed_chars);.*?$/\1/" | while read val; do
|
||||
if ! grep -q "$extern_type::$val" $file; then
|
||||
@ -148,7 +153,7 @@ for extern_type in ${!EXTERN_TYPES[@]}; do
|
||||
# sed -i -r "0,/(\s*)extern const $type_of_extern [$allowed_chars]+/s//\1extern const $type_of_extern $val;\n&/" $file || \
|
||||
# awk '{ print; if (ns == 1) { ns = 2 }; if (ns == 2) { ns = 0; print "namespace $extern_type\n{\n extern const $type_of_extern '$val';\n}" } }; /namespace DB/ { ns = 1; };' < $file > ${file}.tmp && mv ${file}.tmp $file )
|
||||
find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' | {
|
||||
grep -vP $EXCLUDE_DIRS | xargs grep -l -P "$extern_type::$allowed_chars"
|
||||
grep -vP $EXCLUDE | xargs grep -l -P "$extern_type::$allowed_chars"
|
||||
} | while read file; do
|
||||
grep -P "$extern_type::$allowed_chars" $file | grep -P -v '^\s*//' | sed -r -e "s/^.*?$extern_type::($allowed_chars).*?$/\1/" | while read val; do
|
||||
if ! grep -q "extern const $type_of_extern $val" $file; then
|
||||
@ -161,7 +166,7 @@ for extern_type in ${!EXTERN_TYPES[@]}; do
|
||||
|
||||
# Duplicates
|
||||
find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' | {
|
||||
grep -vP $EXCLUDE_DIRS | xargs grep -l -P "$extern_type::$allowed_chars"
|
||||
grep -vP $EXCLUDE | xargs grep -l -P "$extern_type::$allowed_chars"
|
||||
} | while read file; do
|
||||
grep -P "extern const $type_of_extern $allowed_chars;" $file | sort | uniq -c | grep -v -P ' +1 ' && echo "Duplicate $extern_type in file $file"
|
||||
done
|
||||
@ -169,32 +174,32 @@ done
|
||||
|
||||
# Three or more consecutive empty lines
|
||||
find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' |
|
||||
grep -vP $EXCLUDE_DIRS |
|
||||
grep -vP $EXCLUDE |
|
||||
while read file; do awk '/^$/ { ++i; if (i > 2) { print "More than two consecutive empty lines in file '$file'" } } /./ { i = 0 }' $file; done
|
||||
|
||||
# Check that every header file has #pragma once in first line
|
||||
find $ROOT_PATH/{src,programs,utils} -name '*.h' |
|
||||
grep -vP $EXCLUDE_DIRS |
|
||||
grep -vP $EXCLUDE |
|
||||
while read file; do [[ $(head -n1 $file) != '#pragma once' ]] && echo "File $file must have '#pragma once' in first line"; done
|
||||
|
||||
# Too many exclamation marks
|
||||
find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' |
|
||||
grep -vP $EXCLUDE_DIRS |
|
||||
grep -vP $EXCLUDE |
|
||||
xargs grep -F '!!!' | grep -P '.' && echo "Too many exclamation marks (looks dirty, unconfident)."
|
||||
|
||||
# Exclamation mark in a message
|
||||
find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' |
|
||||
grep -vP $EXCLUDE_DIRS |
|
||||
grep -vP $EXCLUDE |
|
||||
xargs grep -F '!",' | grep -P '.' && echo "No need for an exclamation mark (looks dirty, unconfident)."
|
||||
|
||||
# Trailing whitespaces
|
||||
find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' |
|
||||
grep -vP $EXCLUDE_DIRS |
|
||||
grep -vP $EXCLUDE |
|
||||
xargs grep -n -P ' $' | grep -n -P '.' && echo "^ Trailing whitespaces."
|
||||
|
||||
# Forbid stringstream because it's easy to use them incorrectly and hard to debug possible issues
|
||||
find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' |
|
||||
grep -vP $EXCLUDE_DIRS |
|
||||
grep -vP $EXCLUDE |
|
||||
xargs grep -P 'std::[io]?stringstream' | grep -v "STYLE_CHECK_ALLOW_STD_STRING_STREAM" && echo "Use WriteBufferFromOwnString or ReadBufferFromString instead of std::stringstream"
|
||||
|
||||
# Forbid std::cerr/std::cout in src (fine in programs/utils)
|
||||
@ -204,6 +209,7 @@ std_cerr_cout_excludes=(
|
||||
_fuzzer
|
||||
# OK
|
||||
src/Common/ProgressIndication.cpp
|
||||
src/Common/ProgressTable.cpp
|
||||
# only under #ifdef DBMS_HASH_MAP_DEBUG_RESIZES, that is used only in tests
|
||||
src/Common/HashTable/HashTable.h
|
||||
# SensitiveDataMasker::printStats()
|
||||
@ -230,11 +236,10 @@ std_cerr_cout_excludes=(
|
||||
)
|
||||
sources_with_std_cerr_cout=( $(
|
||||
find $ROOT_PATH/{src,base} -name '*.h' -or -name '*.cpp' | \
|
||||
grep -vP $EXCLUDE_DIRS | \
|
||||
grep -vP $EXCLUDE | \
|
||||
grep -F -v $(printf -- "-e %s " "${std_cerr_cout_excludes[@]}") | \
|
||||
xargs grep -F --with-filename -e std::cerr -e std::cout | cut -d: -f1 | sort -u
|
||||
) )
|
||||
|
||||
# Exclude comments
|
||||
for src in "${sources_with_std_cerr_cout[@]}"; do
|
||||
# suppress stderr, since it may contain warning for #pargma once in headers
|
||||
@ -279,23 +284,23 @@ fi
|
||||
|
||||
# Forbid std::filesystem::is_symlink and std::filesystem::read_symlink, because it's easy to use them incorrectly
|
||||
find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' |
|
||||
grep -vP $EXCLUDE_DIRS |
|
||||
grep -vP $EXCLUDE |
|
||||
xargs grep -P '::(is|read)_symlink' | grep -v "STYLE_CHECK_ALLOW_STD_FS_SYMLINK" && echo "Use DB::FS::isSymlink and DB::FS::readSymlink instead"
|
||||
|
||||
# Forbid __builtin_unreachable(), because it's hard to debug when it becomes reachable
|
||||
find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' |
|
||||
grep -vP $EXCLUDE_DIRS |
|
||||
grep -vP $EXCLUDE |
|
||||
xargs grep -P '__builtin_unreachable' && echo "Use UNREACHABLE() from defines.h instead"
|
||||
|
||||
# Forbid mt19937() and random_device() which are outdated and slow
|
||||
find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' |
|
||||
grep -vP $EXCLUDE_DIRS |
|
||||
grep -vP $EXCLUDE |
|
||||
xargs grep -P '(std::mt19937|std::mersenne_twister_engine|std::random_device)' && echo "Use pcg64_fast (from pcg_random.h) and randomSeed (from Common/randomSeed.h) instead"
|
||||
|
||||
# Require checking return value of close(),
|
||||
# since it can hide fd misuse and break other places.
|
||||
find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' |
|
||||
grep -vP $EXCLUDE_DIRS |
|
||||
grep -vP $EXCLUDE |
|
||||
xargs grep -e ' close(.*fd' -e ' ::close(' | grep -v = && echo "Return value of close() should be checked"
|
||||
|
||||
# A small typo can lead to debug code in release builds, see https://github.com/ClickHouse/ClickHouse/pull/47647
|
||||
@ -322,18 +327,15 @@ ls -1d $ROOT_PATH/contrib/*-cmake | xargs -I@ find @ -name 'CMakeLists.txt' -or
|
||||
|
||||
# Wrong spelling of abbreviations, e.g. SQL is right, Sql is wrong. XMLHttpRequest is very wrong.
|
||||
find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' |
|
||||
grep -vP $EXCLUDE_DIRS |
|
||||
grep -vP $EXCLUDE |
|
||||
xargs grep -P 'Sql|Html|Xml|Cpu|Tcp|Udp|Http|Db|Json|Yaml' | grep -v -P 'RabbitMQ|Azure|Aws|aws|Avro|IO/S3' &&
|
||||
echo "Abbreviations such as SQL, XML, HTTP, should be in all caps. For example, SQL is right, Sql is wrong. XMLHttpRequest is very wrong."
|
||||
|
||||
find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' |
|
||||
grep -vP $EXCLUDE_DIRS |
|
||||
grep -vP $EXCLUDE |
|
||||
xargs grep -F -i 'ErrorCodes::LOGICAL_ERROR, "Logical error:' &&
|
||||
echo "If an exception has LOGICAL_ERROR code, there is no need to include the text 'Logical error' in the exception message, because then the phrase 'Logical error' will be printed twice."
|
||||
|
||||
# There shouldn't be any code snippets under GPL or LGPL
|
||||
find $ROOT_PATH/{src,base,programs} -name '*.h' -or -name '*.cpp' 2>/dev/null | xargs grep -i -F 'General Public License' && echo "There shouldn't be any code snippets under GPL or LGPL"
|
||||
|
||||
PATTERN="allow_";
|
||||
DIFF=$(comm -3 <(grep -o "\b$PATTERN\w*\b" $ROOT_PATH/src/Core/Settings.cpp | sort -u) <(grep -o -h "\b$PATTERN\w*\b" $ROOT_PATH/src/Databases/enableAllExperimentalSettings.cpp $ROOT_PATH/utils/check-style/experimental_settings_ignore.txt | sort -u));
|
||||
[ -n "$DIFF" ] && echo "$DIFF" && echo "^^ Detected 'allow_*' settings that might need to be included in src/Databases/enableAllExperimentalSettings.cpp" && echo "Alternatively, consider adding an exception to utils/check-style/experimental_settings_ignore.txt"
|
||||
|
284
ci_v2/jobs/scripts/functional_tests_results.py
Executable file
284
ci_v2/jobs/scripts/functional_tests_results.py
Executable file
@ -0,0 +1,284 @@
|
||||
import dataclasses
|
||||
from typing import List
|
||||
|
||||
from praktika.environment import Environment
|
||||
from praktika.result import Result
|
||||
|
||||
OK_SIGN = "[ OK "
|
||||
FAIL_SIGN = "[ FAIL "
|
||||
TIMEOUT_SIGN = "[ Timeout! "
|
||||
UNKNOWN_SIGN = "[ UNKNOWN "
|
||||
SKIPPED_SIGN = "[ SKIPPED "
|
||||
HUNG_SIGN = "Found hung queries in processlist"
|
||||
SERVER_DIED_SIGN = "Server died, terminating all processes"
|
||||
SERVER_DIED_SIGN2 = "Server does not respond to health check"
|
||||
DATABASE_SIGN = "Database: "
|
||||
|
||||
SUCCESS_FINISH_SIGNS = ["All tests have finished", "No tests were run"]
|
||||
|
||||
RETRIES_SIGN = "Some tests were restarted"
|
||||
|
||||
|
||||
# def write_results(results_file, status_file, results, status):
|
||||
# with open(results_file, "w", encoding="utf-8") as f:
|
||||
# out = csv.writer(f, delimiter="\t")
|
||||
# out.writerows(results)
|
||||
# with open(status_file, "w", encoding="utf-8") as f:
|
||||
# out = csv.writer(f, delimiter="\t")
|
||||
# out.writerow(status)
|
||||
|
||||
BROKEN_TESTS_ANALYZER_TECH_DEBT = [
|
||||
"01624_soft_constraints",
|
||||
# Check after ConstantNode refactoring
|
||||
"02944_variant_as_common_type",
|
||||
]
|
||||
|
||||
|
||||
class FTResultsProcessor:
|
||||
@dataclasses.dataclass
|
||||
class Summary:
|
||||
total: int
|
||||
skipped: int
|
||||
unknown: int
|
||||
failed: int
|
||||
success: int
|
||||
test_results: List[Result]
|
||||
hung: bool = False
|
||||
server_died: bool = False
|
||||
retries: bool = False
|
||||
success_finish: bool = False
|
||||
test_end: bool = True
|
||||
|
||||
def __init__(self, wd):
|
||||
self.tests_output_file = f"{wd}/test_result.txt"
|
||||
# self.test_results_parsed_file = f"{wd}/test_result.tsv"
|
||||
# self.status_file = f"{wd}/check_status.tsv"
|
||||
self.broken_tests = BROKEN_TESTS_ANALYZER_TECH_DEBT
|
||||
|
||||
def _process_test_output(self):
|
||||
total = 0
|
||||
skipped = 0
|
||||
unknown = 0
|
||||
failed = 0
|
||||
success = 0
|
||||
hung = False
|
||||
server_died = False
|
||||
retries = False
|
||||
success_finish = False
|
||||
test_results = []
|
||||
test_end = True
|
||||
|
||||
with open(self.tests_output_file, "r", encoding="utf-8") as test_file:
|
||||
for line in test_file:
|
||||
original_line = line
|
||||
line = line.strip()
|
||||
|
||||
if any(s in line for s in SUCCESS_FINISH_SIGNS):
|
||||
success_finish = True
|
||||
# Ignore hung check report, since it may be quite large.
|
||||
# (and may break python parser which has limit of 128KiB for each row).
|
||||
if HUNG_SIGN in line:
|
||||
hung = True
|
||||
break
|
||||
if SERVER_DIED_SIGN in line or SERVER_DIED_SIGN2 in line:
|
||||
server_died = True
|
||||
if RETRIES_SIGN in line:
|
||||
retries = True
|
||||
if any(
|
||||
sign in line
|
||||
for sign in (OK_SIGN, FAIL_SIGN, UNKNOWN_SIGN, SKIPPED_SIGN)
|
||||
):
|
||||
test_name = line.split(" ")[2].split(":")[0]
|
||||
|
||||
test_time = ""
|
||||
try:
|
||||
time_token = line.split("]")[1].strip().split()[0]
|
||||
float(time_token)
|
||||
test_time = time_token
|
||||
except:
|
||||
pass
|
||||
|
||||
total += 1
|
||||
if TIMEOUT_SIGN in line:
|
||||
if test_name in self.broken_tests:
|
||||
success += 1
|
||||
test_results.append((test_name, "BROKEN", test_time, []))
|
||||
else:
|
||||
failed += 1
|
||||
test_results.append((test_name, "Timeout", test_time, []))
|
||||
elif FAIL_SIGN in line:
|
||||
if test_name in self.broken_tests:
|
||||
success += 1
|
||||
test_results.append((test_name, "BROKEN", test_time, []))
|
||||
else:
|
||||
failed += 1
|
||||
test_results.append((test_name, "FAIL", test_time, []))
|
||||
elif UNKNOWN_SIGN in line:
|
||||
unknown += 1
|
||||
test_results.append((test_name, "FAIL", test_time, []))
|
||||
elif SKIPPED_SIGN in line:
|
||||
skipped += 1
|
||||
test_results.append((test_name, "SKIPPED", test_time, []))
|
||||
else:
|
||||
if OK_SIGN in line and test_name in self.broken_tests:
|
||||
skipped += 1
|
||||
test_results.append(
|
||||
(
|
||||
test_name,
|
||||
"NOT_FAILED",
|
||||
test_time,
|
||||
[
|
||||
"This test passed. Update analyzer_tech_debt.txt.\n"
|
||||
],
|
||||
)
|
||||
)
|
||||
else:
|
||||
success += int(OK_SIGN in line)
|
||||
test_results.append((test_name, "OK", test_time, []))
|
||||
test_end = False
|
||||
elif (
|
||||
len(test_results) > 0
|
||||
and test_results[-1][1] == "FAIL"
|
||||
and not test_end
|
||||
):
|
||||
test_results[-1][3].append(original_line)
|
||||
# Database printed after everything else in case of failures,
|
||||
# so this is a stop marker for capturing test output.
|
||||
#
|
||||
# And it is handled after everything else to include line with database into the report.
|
||||
if DATABASE_SIGN in line:
|
||||
test_end = True
|
||||
|
||||
test_results = [
|
||||
Result(
|
||||
name=test[0],
|
||||
status=test[1],
|
||||
start_time=None,
|
||||
duration=float(test[2]),
|
||||
info="".join(test[3])[:8192],
|
||||
)
|
||||
for test in test_results
|
||||
]
|
||||
|
||||
s = self.Summary(
|
||||
total=total,
|
||||
skipped=skipped,
|
||||
unknown=unknown,
|
||||
failed=failed,
|
||||
success=success,
|
||||
test_results=test_results,
|
||||
hung=hung,
|
||||
server_died=server_died,
|
||||
success_finish=success_finish,
|
||||
retries=retries,
|
||||
)
|
||||
|
||||
return s
|
||||
|
||||
def run(self):
|
||||
state = Result.Status.SUCCESS
|
||||
s = self._process_test_output()
|
||||
test_results = s.test_results
|
||||
|
||||
# # Check test_results.tsv for sanitizer asserts, crashes and other critical errors.
|
||||
# # If the file is present, it's expected to be generated by stress_test.lib check for critical errors
|
||||
# # In the end this file will be fully regenerated, including both results from critical errors check and
|
||||
# # functional test results.
|
||||
# if test_results_path and os.path.exists(test_results_path):
|
||||
# with open(test_results_path, "r", encoding="utf-8") as test_results_file:
|
||||
# existing_test_results = list(
|
||||
# csv.reader(test_results_file, delimiter="\t")
|
||||
# )
|
||||
# for test in existing_test_results:
|
||||
# if len(test) < 2:
|
||||
# unknown += 1
|
||||
# else:
|
||||
# test_results.append(test)
|
||||
#
|
||||
# if test[1] != "OK":
|
||||
# failed += 1
|
||||
# else:
|
||||
# success += 1
|
||||
|
||||
# is_flaky_check = 1 < int(os.environ.get("NUM_TRIES", 1))
|
||||
# logging.info("Is flaky check: %s", is_flaky_check)
|
||||
# # If no tests were run (success == 0) it indicates an error (e.g. server did not start or crashed immediately)
|
||||
# # But it's Ok for "flaky checks" - they can contain just one test for check which is marked as skipped.
|
||||
# if failed != 0 or unknown != 0 or (success == 0 and (not is_flaky_check)):
|
||||
if s.failed != 0 or s.unknown != 0:
|
||||
state = Result.Status.FAILED
|
||||
|
||||
if s.hung:
|
||||
state = Result.Status.FAILED
|
||||
test_results.append(
|
||||
Result("Some queries hung", "FAIL", info="Some queries hung")
|
||||
)
|
||||
elif s.server_died:
|
||||
state = Result.Status.FAILED
|
||||
# When ClickHouse server crashes, some tests are still running
|
||||
# and fail because they cannot connect to server
|
||||
for result in test_results:
|
||||
if result.status == "FAIL":
|
||||
result.status = "SERVER_DIED"
|
||||
test_results.append(Result("Server died", "FAIL", info="Server died"))
|
||||
elif not s.success_finish:
|
||||
state = Result.Status.FAILED
|
||||
test_results.append(
|
||||
Result("Tests are not finished", "FAIL", info="Tests are not finished")
|
||||
)
|
||||
elif s.retries:
|
||||
test_results.append(
|
||||
Result("Some tests restarted", "SKIPPED", info="Some tests restarted")
|
||||
)
|
||||
else:
|
||||
pass
|
||||
|
||||
# TODO: !!!
|
||||
# def test_result_comparator(item):
|
||||
# # sort by status then by check name
|
||||
# order = {
|
||||
# "FAIL": 0,
|
||||
# "SERVER_DIED": 1,
|
||||
# "Timeout": 2,
|
||||
# "NOT_FAILED": 3,
|
||||
# "BROKEN": 4,
|
||||
# "OK": 5,
|
||||
# "SKIPPED": 6,
|
||||
# }
|
||||
# return order.get(item[1], 10), str(item[0]), item[1]
|
||||
#
|
||||
# test_results.sort(key=test_result_comparator)
|
||||
|
||||
return Result.create_from(
|
||||
name=Environment.JOB_NAME,
|
||||
results=test_results,
|
||||
status=state,
|
||||
files=[self.tests_output_file],
|
||||
with_info_from_results=False,
|
||||
)
|
||||
|
||||
|
||||
# if __name__ == "__main__":
|
||||
# logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s")
|
||||
# parser = argparse.ArgumentParser(
|
||||
# description="ClickHouse script for parsing results of functional tests"
|
||||
# )
|
||||
#
|
||||
# parser.add_argument("--out-results-file", default="/test_output/test_results.tsv")
|
||||
# parser.add_argument("--out-status-file", default="/test_output/check_status.tsv")
|
||||
# args = parser.parse_args()
|
||||
#
|
||||
# broken_tests = []
|
||||
# state, description, test_results = process_result(
|
||||
# args.in_results_dir,
|
||||
# broken_tests,
|
||||
# args.in_test_result_file,
|
||||
# args.in_results_file,
|
||||
# )
|
||||
# logging.info("Result parsed")
|
||||
# status = (state, description)
|
||||
#
|
||||
#
|
||||
#
|
||||
# write_results(args.out_results_file, args.out_status_file, test_results, status)
|
||||
# logging.info("Result written")
|
@ -7,6 +7,7 @@ S3_BUCKET_HTTP_ENDPOINT = "clickhouse-builds.s3.amazonaws.com"
|
||||
class RunnerLabels:
|
||||
CI_SERVICES = "ci_services"
|
||||
CI_SERVICES_EBS = "ci_services_ebs"
|
||||
BUILDER = "builder"
|
||||
|
||||
|
||||
BASE_BRANCH = "master"
|
||||
@ -29,142 +30,122 @@ SECRETS = [
|
||||
DOCKERS = [
|
||||
# Docker.Config(
|
||||
# name="clickhouse/binary-builder",
|
||||
# path="./docker/packager/binary-builder",
|
||||
# arm64=True,
|
||||
# amd64=True,
|
||||
# path="./ci_v2/docker/packager/binary-builder",
|
||||
# platforms=Docker.Platforms.arm_amd,
|
||||
# depends_on=[],
|
||||
# ),
|
||||
# Docker.Config(
|
||||
# name="clickhouse/cctools",
|
||||
# path="./docker/packager/cctools",
|
||||
# arm64=True,
|
||||
# amd64=True,
|
||||
# path="./ci_v2/docker/packager/cctools",
|
||||
# platforms=Docker.Platforms.arm_amd,
|
||||
# depends_on=[],
|
||||
# ),
|
||||
# Docker.Config(
|
||||
# name="clickhouse/test-old-centos",
|
||||
# path="./docker/test/compatibility/centos",
|
||||
# arm64=True,
|
||||
# amd64=True,
|
||||
# path="./ci_v2/docker/test/compatibility/centos",
|
||||
# platforms=Docker.Platforms.arm_amd,
|
||||
# depends_on=[],
|
||||
# ),
|
||||
# Docker.Config(
|
||||
# name="clickhouse/test-old-ubuntu",
|
||||
# path="./docker/test/compatibility/ubuntu",
|
||||
# arm64=True,
|
||||
# amd64=True,
|
||||
# path="./ci_v2/docker/test/compatibility/ubuntu",
|
||||
# platforms=Docker.Platforms.arm_amd,
|
||||
# depends_on=[],
|
||||
# ),
|
||||
# Docker.Config(
|
||||
# name="clickhouse/test-util",
|
||||
# path="./docker/test/util",
|
||||
# arm64=True,
|
||||
# amd64=True,
|
||||
# path="./ci_v2/docker/test/util",
|
||||
# platforms=Docker.Platforms.arm_amd,
|
||||
# depends_on=[],
|
||||
# ),
|
||||
# Docker.Config(
|
||||
# name="clickhouse/integration-test",
|
||||
# path="./docker/test/integration/base",
|
||||
# arm64=True,
|
||||
# amd64=True,
|
||||
# path="./ci_v2/docker/test/integration/base",
|
||||
# platforms=Docker.Platforms.arm_amd,
|
||||
# depends_on=["clickhouse/test-base"],
|
||||
# ),
|
||||
# Docker.Config(
|
||||
# name="clickhouse/fuzzer",
|
||||
# path="./docker/test/fuzzer",
|
||||
# arm64=True,
|
||||
# amd64=True,
|
||||
# path="./ci_v2/docker/test/fuzzer",
|
||||
# platforms=Docker.Platforms.arm_amd,
|
||||
# depends_on=["clickhouse/test-base"],
|
||||
# ),
|
||||
# Docker.Config(
|
||||
# name="clickhouse/performance-comparison",
|
||||
# path="./docker/test/performance-comparison",
|
||||
# arm64=True,
|
||||
# amd64=True,
|
||||
# path="./ci_v2/docker/test/performance-comparison",
|
||||
# platforms=Docker.Platforms.arm_amd,
|
||||
# depends_on=[],
|
||||
# ),
|
||||
# Docker.Config(
|
||||
# name="clickhouse/fasttest",
|
||||
# path="./docker/test/fasttest",
|
||||
# arm64=True,
|
||||
# amd64=True,
|
||||
# depends_on=["clickhouse/test-util"],
|
||||
# ),
|
||||
Docker.Config(
|
||||
name="clickhouse/fasttest",
|
||||
path="./ci_v2/docker/fasttest",
|
||||
platforms=Docker.Platforms.arm_amd,
|
||||
depends_on=[],
|
||||
),
|
||||
# Docker.Config(
|
||||
# name="clickhouse/test-base",
|
||||
# path="./docker/test/base",
|
||||
# arm64=True,
|
||||
# amd64=True,
|
||||
# path="./ci_v2/docker/test/base",
|
||||
# platforms=Docker.Platforms.arm_amd,
|
||||
# depends_on=["clickhouse/test-util"],
|
||||
# ),
|
||||
# Docker.Config(
|
||||
# name="clickhouse/clickbench",
|
||||
# path="./docker/test/clickbench",
|
||||
# arm64=True,
|
||||
# amd64=True,
|
||||
# path="./ci_v2/docker/test/clickbench",
|
||||
# platforms=Docker.Platforms.arm_amd,
|
||||
# depends_on=["clickhouse/test-base"],
|
||||
# ),
|
||||
# Docker.Config(
|
||||
# name="clickhouse/keeper-jepsen-test",
|
||||
# path="./docker/test/keeper-jepsen",
|
||||
# arm64=True,
|
||||
# amd64=True,
|
||||
# path="./ci_v2/docker/test/keeper-jepsen",
|
||||
# platforms=Docker.Platforms.arm_amd,
|
||||
# depends_on=["clickhouse/test-base"],
|
||||
# ),
|
||||
# Docker.Config(
|
||||
# name="clickhouse/server-jepsen-test",
|
||||
# path="./docker/test/server-jepsen",
|
||||
# arm64=True,
|
||||
# amd64=True,
|
||||
# path="./ci_v2/docker/test/server-jepsen",
|
||||
# platforms=Docker.Platforms.arm_amd,
|
||||
# depends_on=["clickhouse/test-base"],
|
||||
# ),
|
||||
# Docker.Config(
|
||||
# name="clickhouse/sqllogic-test",
|
||||
# path="./docker/test/sqllogic",
|
||||
# arm64=True,
|
||||
# amd64=True,
|
||||
# path="./ci_v2/docker/test/sqllogic",
|
||||
# platforms=Docker.Platforms.arm_amd,
|
||||
# depends_on=["clickhouse/test-base"],
|
||||
# ),
|
||||
# Docker.Config(
|
||||
# name="clickhouse/sqltest",
|
||||
# path="./docker/test/sqltest",
|
||||
# arm64=True,
|
||||
# amd64=True,
|
||||
# path="./ci_v2/docker/test/sqltest",
|
||||
# platforms=Docker.Platforms.arm_amd,
|
||||
# depends_on=["clickhouse/test-base"],
|
||||
# ),
|
||||
# Docker.Config(
|
||||
# name="clickhouse/stateless-test",
|
||||
# path="./docker/test/stateless",
|
||||
# arm64=True,
|
||||
# amd64=True,
|
||||
# path="./ci_v2/docker/test/stateless",
|
||||
# platforms=Docker.Platforms.arm_amd,
|
||||
# depends_on=["clickhouse/test-base"],
|
||||
# ),
|
||||
# Docker.Config(
|
||||
# name="clickhouse/stateful-test",
|
||||
# path="./docker/test/stateful",
|
||||
# arm64=True,
|
||||
# amd64=True,
|
||||
# path="./ci_v2/docker/test/stateful",
|
||||
# platforms=Docker.Platforms.arm_amd,
|
||||
# depends_on=["clickhouse/stateless-test"],
|
||||
# ),
|
||||
# Docker.Config(
|
||||
# name="clickhouse/stress-test",
|
||||
# path="./docker/test/stress",
|
||||
# arm64=True,
|
||||
# amd64=True,
|
||||
# path="./ci_v2/docker/test/stress",
|
||||
# platforms=Docker.Platforms.arm_amd,
|
||||
# depends_on=["clickhouse/stateful-test"],
|
||||
# ),
|
||||
# Docker.Config(
|
||||
# name="clickhouse/unit-test",
|
||||
# path="./docker/test/unit",
|
||||
# arm64=True,
|
||||
# amd64=True,
|
||||
# path="./ci_v2/docker/test/unit",
|
||||
# platforms=Docker.Platforms.arm_amd,
|
||||
# depends_on=["clickhouse/test-base"],
|
||||
# ),
|
||||
# Docker.Config(
|
||||
# name="clickhouse/integration-tests-runner",
|
||||
# path="./docker/test/integration/runner",
|
||||
# arm64=True,
|
||||
# amd64=True,
|
||||
# path="./ci_v2/docker/test/integration/runner",
|
||||
# platforms=Docker.Platforms.arm_amd,
|
||||
# depends_on=["clickhouse/test-base"],
|
||||
# ),
|
||||
Docker.Config(
|
||||
@ -175,9 +156,8 @@ DOCKERS = [
|
||||
),
|
||||
# Docker.Config(
|
||||
# name="clickhouse/docs-builder",
|
||||
# path="./docker/docs/builder",
|
||||
# arm64=True,
|
||||
# amd64=True,
|
||||
# path="./ci_v2/docker/docs/builder",
|
||||
# platforms=Docker.Platforms.arm_amd,
|
||||
# depends_on=["clickhouse/test-base"],
|
||||
# ),
|
||||
]
|
||||
@ -249,3 +229,4 @@ DOCKERS = [
|
||||
|
||||
class JobNames:
|
||||
STYLE_CHECK = "Style Check"
|
||||
FAST_TEST = "Fast test"
|
||||
|
@ -16,12 +16,20 @@ style_check_job = Job.Config(
|
||||
run_in_docker="clickhouse/style-test",
|
||||
)
|
||||
|
||||
fast_test_job = Job.Config(
|
||||
name=JobNames.FAST_TEST,
|
||||
runs_on=[RunnerLabels.BUILDER],
|
||||
command="python3 ./ci_v2/jobs/fast_test.py",
|
||||
run_in_docker="clickhouse/fasttest",
|
||||
)
|
||||
|
||||
workflow = Workflow.Config(
|
||||
name="PR",
|
||||
event=Workflow.Event.PULL_REQUEST,
|
||||
base_branches=[BASE_BRANCH],
|
||||
jobs=[
|
||||
style_check_job,
|
||||
fast_test_job,
|
||||
],
|
||||
dockers=DOCKERS,
|
||||
secrets=SECRETS,
|
||||
@ -36,9 +44,7 @@ WORKFLOWS = [
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# example: local job test inside praktika environment
|
||||
# local job test inside praktika environment
|
||||
from praktika.runner import Runner
|
||||
|
||||
Runner.generate_dummy_environment(workflow, style_check_job)
|
||||
|
||||
Runner().run(workflow, style_check_job)
|
||||
Runner().run(workflow, fast_test_job, docker="fasttest", dummy_env=True)
|
||||
|
@ -16,6 +16,7 @@ RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes \
|
||||
libxml2-utils \
|
||||
locales \
|
||||
moreutils \
|
||||
ripgrep \
|
||||
python3-pip \
|
||||
yamllint \
|
||||
zstd \
|
||||
|
@ -36,6 +36,7 @@ SETTINGS
|
||||
## Settings {#settings}
|
||||
|
||||
The set of supported settings is the same as for `S3Queue` table engine, but without `s3queue_` prefix. See [full list of settings settings](../../../engines/table-engines/integrations/s3queue.md#settings).
|
||||
To get a list of settings, configured for the table, use `system.s3_queue_settings` table. Available from `24.10`.
|
||||
|
||||
## Description {#description}
|
||||
|
||||
|
@ -69,6 +69,8 @@ SETTINGS
|
||||
|
||||
## Settings {#settings}
|
||||
|
||||
To get a list of settings, configured for the table, use `system.s3_queue_settings` table. Available from `24.10`.
|
||||
|
||||
### mode {#mode}
|
||||
|
||||
Possible values:
|
||||
|
@ -33,7 +33,7 @@ The tags or attributes are saved as two parallel arrays, containing the keys and
|
||||
|
||||
## Log-query-settings
|
||||
|
||||
ClickHouse allows you to log changes to query settings during query execution. When enabled, any modifications made to query settings will be recorded in the OpenTelemetry span log. This feature is particularly useful in production environments for tracking configuration changes that may affect query performance.
|
||||
Setting [log_query_settings](settings/settings.md) allows log changes to query settings during query execution. When enabled, any modifications made to query settings will be recorded in the OpenTelemetry span log. This feature is particularly useful in production environments for tracking configuration changes that may affect query performance.
|
||||
|
||||
## Integration with monitoring systems
|
||||
|
||||
|
@ -1488,6 +1488,8 @@ Keys:
|
||||
- `formatting` – Log format for console output. Currently, only `json` is supported).
|
||||
- `use_syslog` - Also forward log output to syslog.
|
||||
- `syslog_level` - Log level for logging to syslog.
|
||||
- `message_regexp` - Only log messages that match this regular expression. Defaults to `""`, indicating no filtering.
|
||||
- `message_regexp_negative` - Only log messages that don't match this regular expression. Defaults to `""`, indicating no filtering.
|
||||
|
||||
**Log format specifiers**
|
||||
|
||||
@ -1576,6 +1578,28 @@ The log level of individual log names can be overridden. For example, to mute al
|
||||
</logger>
|
||||
```
|
||||
|
||||
**Regular Expression Filtering**
|
||||
|
||||
The messages logged can be filtered using regular expressions using `message_regexp` and `message_regexp_negative`. This can be done on a per-level basis or globally. If both a global and logger-specific pattern is specified, the global pattern is overridden (ignored) and only the logger-specific pattern applies. The positive and negative patterns are considered independently for this situation. Note: Using this feature may cause a slight slowdown in performance.
|
||||
|
||||
|
||||
```xml
|
||||
<logger>
|
||||
<level>trace</level>
|
||||
<!-- Global: Don't log Trace messages -->
|
||||
<message_regexp_negative>.*Trace.*</message_regexp_negative>
|
||||
|
||||
<message_regexps>
|
||||
<logger>
|
||||
<!-- For the executeQuery logger, only log if message has "Read", but not "from" -->
|
||||
<name>executeQuery</name>
|
||||
<message_regexp>.*Read.*</message_regexp>
|
||||
<message_regexp_negative>.*from.*</message_regexp_negative>
|
||||
</logger>
|
||||
</message_regexps>
|
||||
</logger>
|
||||
```
|
||||
|
||||
### syslog
|
||||
|
||||
To write log messages additionally to syslog:
|
||||
|
@ -1079,6 +1079,8 @@ Possible values:
|
||||
|
||||
Default value: 0 bytes.
|
||||
|
||||
Note that if both `min_free_disk_bytes_to_perform_insert` and `min_free_disk_ratio_to_perform_insert` are specified, ClickHouse will count on the value that will allow to perform inserts on a bigger amount of free memory.
|
||||
|
||||
## min_free_disk_ratio_to_perform_insert
|
||||
|
||||
The minimum free to total disk space ratio to perform an `INSERT`. Must be a floating point value between 0 and 1. Note that this setting:
|
||||
|
20
docs/en/operations/system-tables/azure_queue_settings.md
Normal file
20
docs/en/operations/system-tables/azure_queue_settings.md
Normal file
@ -0,0 +1,20 @@
|
||||
---
|
||||
slug: /en/operations/system-tables/azure_queue_settings
|
||||
---
|
||||
# azure_queue_settings
|
||||
|
||||
Contains information about settings of [AzureQueue](../../engines/table-engines/integrations/azure-queue.md) tables.
|
||||
Available from `24.10` server version.
|
||||
|
||||
Columns:
|
||||
|
||||
- `database` ([String](../../sql-reference/data-types/string.md)) — Table name.
|
||||
- `table` ([String](../../sql-reference/data-types/string.md)) — Database name.
|
||||
- `name` ([String](../../sql-reference/data-types/string.md)) — Setting name.
|
||||
- `value` ([String](../../sql-reference/data-types/string.md)) — Setting value.
|
||||
- `changed` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Whether the setting was explicitly defined in the config or explicitly changed.
|
||||
- `description` ([String](../../sql-reference/data-types/string.md)) — Setting description.
|
||||
- `alterable` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Shows whether the setting can be changes via `ALTER TABLE ... MODIFY SETTING`.
|
||||
- `0` — Current user can alter the setting.
|
||||
- `1` — Current user can’t alter the setting.
|
||||
- `type` ([String](../../sql-reference/data-types/string.md)) — Setting type (implementation specific string value).
|
@ -13,10 +13,12 @@ The `system.part_log` table contains the following columns:
|
||||
- `query_id` ([String](../../sql-reference/data-types/string.md)) — Identifier of the `INSERT` query that created this data part.
|
||||
- `event_type` ([Enum8](../../sql-reference/data-types/enum.md)) — Type of the event that occurred with the data part. Can have one of the following values:
|
||||
- `NewPart` — Inserting of a new data part.
|
||||
- `MergeParts` — Merging of data parts.
|
||||
- `MergePartsStart` — Merging of data parts has started.
|
||||
- `MergeParts` — Merging of data parts has finished.
|
||||
- `DownloadPart` — Downloading a data part.
|
||||
- `RemovePart` — Removing or detaching a data part using [DETACH PARTITION](../../sql-reference/statements/alter/partition.md#alter_detach-partition).
|
||||
- `MutatePart` — Mutating of a data part.
|
||||
- `MutatePartStart` — Mutating of a data part has started.
|
||||
- `MutatePart` — Mutating of a data part has finished.
|
||||
- `MovePart` — Moving the data part from the one disk to another one.
|
||||
- `merge_reason` ([Enum8](../../sql-reference/data-types/enum.md)) — The reason for the event with type `MERGE_PARTS`. Can have one of the following values:
|
||||
- `NotAMerge` — The current event has the type other than `MERGE_PARTS`.
|
||||
|
20
docs/en/operations/system-tables/s3_queue_settings.md
Normal file
20
docs/en/operations/system-tables/s3_queue_settings.md
Normal file
@ -0,0 +1,20 @@
|
||||
---
|
||||
slug: /en/operations/system-tables/s3_queue_settings
|
||||
---
|
||||
# s3_queue_settings
|
||||
|
||||
Contains information about settings of [S3Queue](../../engines/table-engines/integrations/s3queue.md) tables.
|
||||
Available from `24.10` server version.
|
||||
|
||||
Columns:
|
||||
|
||||
- `database` ([String](../../sql-reference/data-types/string.md)) — Table name.
|
||||
- `table` ([String](../../sql-reference/data-types/string.md)) — Database name.
|
||||
- `name` ([String](../../sql-reference/data-types/string.md)) — Setting name.
|
||||
- `value` ([String](../../sql-reference/data-types/string.md)) — Setting value.
|
||||
- `changed` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Whether the setting was explicitly defined in the config or explicitly changed.
|
||||
- `description` ([String](../../sql-reference/data-types/string.md)) — Setting description.
|
||||
- `alterable` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Shows whether the setting can be changes via `ALTER TABLE ... MODIFY SETTING`.
|
||||
- `0` — Current user can alter the setting.
|
||||
- `1` — Current user can’t alter the setting.
|
||||
- `type` ([String](../../sql-reference/data-types/string.md)) — Setting type (implementation specific string value).
|
5
praktika/__init__.py
Normal file
5
praktika/__init__.py
Normal file
@ -0,0 +1,5 @@
|
||||
from .artifact import Artifact
|
||||
from .docker import Docker
|
||||
from .job import Job
|
||||
from .secret import Secret
|
||||
from .workflow import Workflow
|
94
praktika/__main__.py
Normal file
94
praktika/__main__.py
Normal file
@ -0,0 +1,94 @@
|
||||
import argparse
|
||||
import sys
|
||||
|
||||
from praktika.html_prepare import Html
|
||||
from praktika.utils import Utils
|
||||
from praktika.validator import Validator
|
||||
from praktika.yaml_generator import YamlGenerator
|
||||
|
||||
|
||||
def create_parser():
|
||||
parser = argparse.ArgumentParser(prog="python3 -m praktika")
|
||||
|
||||
subparsers = parser.add_subparsers(dest="command", help="Available subcommands")
|
||||
|
||||
run_parser = subparsers.add_parser("run", help="Job Runner")
|
||||
run_parser.add_argument("--job", help="Job Name", type=str, required=True)
|
||||
run_parser.add_argument(
|
||||
"--workflow",
|
||||
help="Workflow Name (required if job name is not uniq per config)",
|
||||
type=str,
|
||||
default="",
|
||||
)
|
||||
run_parser.add_argument(
|
||||
"--no-docker",
|
||||
help="Do not run job in docker even if job config says so, for local test",
|
||||
action="store_true",
|
||||
)
|
||||
run_parser.add_argument(
|
||||
"--docker",
|
||||
help="Custom docker image for job run, for local test",
|
||||
type=str,
|
||||
default="",
|
||||
)
|
||||
run_parser.add_argument(
|
||||
"--param",
|
||||
help="Custom parameter to pass into a job script, it's up to job script how to use it, for local test",
|
||||
type=str,
|
||||
default=None,
|
||||
)
|
||||
run_parser.add_argument(
|
||||
"--ci",
|
||||
help="When not set - dummy env will be generated, for local test",
|
||||
action="store_true",
|
||||
default="",
|
||||
)
|
||||
|
||||
_yaml_parser = subparsers.add_parser("yaml", help="Generates Yaml Workflows")
|
||||
|
||||
_html_parser = subparsers.add_parser("html", help="Uploads HTML page for reports")
|
||||
|
||||
return parser
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = create_parser()
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.command == "yaml":
|
||||
Validator().validate()
|
||||
YamlGenerator().generate()
|
||||
elif args.command == "html":
|
||||
Html.prepare()
|
||||
elif args.command == "run":
|
||||
from praktika.mangle import _get_workflows
|
||||
from praktika.runner import Runner
|
||||
|
||||
workflows = _get_workflows(name=args.workflow or None)
|
||||
job_workflow_pairs = []
|
||||
for workflow in workflows:
|
||||
job = workflow.find_job(args.job, lazy=True)
|
||||
if job:
|
||||
job_workflow_pairs.append((job, workflow))
|
||||
if not job_workflow_pairs:
|
||||
Utils.raise_with_error(
|
||||
f"Failed to find job [{args.job}] workflow [{args.workflow}]"
|
||||
)
|
||||
elif len(job_workflow_pairs) > 1:
|
||||
Utils.raise_with_error(
|
||||
f"More than one job [{args.job}] found - try specifying workflow name with --workflow"
|
||||
)
|
||||
else:
|
||||
job, workflow = job_workflow_pairs[0][0], job_workflow_pairs[0][1]
|
||||
print(f"Going to run job [{job.name}], workflow [{workflow.name}]")
|
||||
Runner().run(
|
||||
workflow=workflow,
|
||||
job=job,
|
||||
docker=args.docker,
|
||||
dummy_env=not args.ci,
|
||||
no_docker=args.no_docker,
|
||||
param=args.param,
|
||||
)
|
||||
else:
|
||||
parser.print_help()
|
||||
sys.exit(1)
|
195
praktika/_environment.py
Normal file
195
praktika/_environment.py
Normal file
@ -0,0 +1,195 @@
|
||||
import dataclasses
|
||||
import json
|
||||
import os
|
||||
from pathlib import Path
|
||||
from types import SimpleNamespace
|
||||
from typing import Any, Dict, List, Type
|
||||
|
||||
from praktika import Workflow
|
||||
from praktika._settings import _Settings
|
||||
from praktika.utils import MetaClasses, T
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class _Environment(MetaClasses.Serializable):
|
||||
WORKFLOW_NAME: str
|
||||
JOB_NAME: str
|
||||
REPOSITORY: str
|
||||
BRANCH: str
|
||||
SHA: str
|
||||
PR_NUMBER: int
|
||||
EVENT_TYPE: str
|
||||
JOB_OUTPUT_STREAM: str
|
||||
EVENT_FILE_PATH: str
|
||||
CHANGE_URL: str
|
||||
COMMIT_URL: str
|
||||
BASE_BRANCH: str
|
||||
RUN_ID: str
|
||||
RUN_URL: str
|
||||
INSTANCE_TYPE: str
|
||||
INSTANCE_ID: str
|
||||
INSTANCE_LIFE_CYCLE: str
|
||||
PARAMETER: Any = None
|
||||
REPORT_INFO: List[str] = dataclasses.field(default_factory=list)
|
||||
LOCAL_RUN_PARAM: str = ""
|
||||
name = "environment"
|
||||
|
||||
@classmethod
|
||||
def file_name_static(cls, _name=""):
|
||||
return f"{_Settings.TEMP_DIR}/{cls.name}.json"
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls: Type[T], obj: Dict[str, Any]) -> T:
|
||||
JOB_OUTPUT_STREAM = os.getenv("GITHUB_OUTPUT", "")
|
||||
obj["JOB_OUTPUT_STREAM"] = JOB_OUTPUT_STREAM
|
||||
if "PARAMETER" in obj:
|
||||
obj["PARAMETER"] = _to_object(obj["PARAMETER"])
|
||||
return cls(**obj)
|
||||
|
||||
def add_info(self, info):
|
||||
self.REPORT_INFO.append(info)
|
||||
self.dump()
|
||||
|
||||
@classmethod
|
||||
def get(cls):
|
||||
if Path(cls.file_name_static()).is_file():
|
||||
return cls.from_fs("environment")
|
||||
else:
|
||||
print("WARNING: Environment: get from env")
|
||||
env = cls.from_env()
|
||||
env.dump()
|
||||
return env
|
||||
|
||||
def set_job_name(self, job_name):
|
||||
self.JOB_NAME = job_name
|
||||
self.dump()
|
||||
return self
|
||||
|
||||
@staticmethod
|
||||
def get_needs_statuses():
|
||||
if Path(_Settings.WORKFLOW_STATUS_FILE).is_file():
|
||||
with open(_Settings.WORKFLOW_STATUS_FILE, "r", encoding="utf8") as f:
|
||||
return json.load(f)
|
||||
else:
|
||||
print(
|
||||
f"ERROR: Status file [{_Settings.WORKFLOW_STATUS_FILE}] does not exist"
|
||||
)
|
||||
raise RuntimeError()
|
||||
|
||||
@classmethod
|
||||
def from_env(cls) -> "_Environment":
|
||||
WORKFLOW_NAME = os.getenv("GITHUB_WORKFLOW", "")
|
||||
JOB_NAME = os.getenv("JOB_NAME", "")
|
||||
REPOSITORY = os.getenv("GITHUB_REPOSITORY", "")
|
||||
BRANCH = os.getenv("GITHUB_HEAD_REF", "")
|
||||
|
||||
EVENT_FILE_PATH = os.getenv("GITHUB_EVENT_PATH", "")
|
||||
JOB_OUTPUT_STREAM = os.getenv("GITHUB_OUTPUT", "")
|
||||
RUN_ID = os.getenv("GITHUB_RUN_ID", "0")
|
||||
RUN_URL = f"https://github.com/{REPOSITORY}/actions/runs/{RUN_ID}"
|
||||
BASE_BRANCH = os.getenv("GITHUB_BASE_REF", "")
|
||||
|
||||
if EVENT_FILE_PATH:
|
||||
with open(EVENT_FILE_PATH, "r", encoding="utf-8") as f:
|
||||
github_event = json.load(f)
|
||||
if "pull_request" in github_event:
|
||||
EVENT_TYPE = Workflow.Event.PULL_REQUEST
|
||||
PR_NUMBER = github_event["pull_request"]["number"]
|
||||
SHA = github_event["pull_request"]["head"]["sha"]
|
||||
CHANGE_URL = github_event["pull_request"]["html_url"]
|
||||
COMMIT_URL = CHANGE_URL + f"/commits/{SHA}"
|
||||
elif "commits" in github_event:
|
||||
EVENT_TYPE = Workflow.Event.PUSH
|
||||
SHA = github_event["after"]
|
||||
CHANGE_URL = github_event["head_commit"]["url"] # commit url
|
||||
PR_NUMBER = 0
|
||||
COMMIT_URL = CHANGE_URL
|
||||
else:
|
||||
assert False, "TODO: not supported"
|
||||
else:
|
||||
print("WARNING: Local execution - dummy Environment will be generated")
|
||||
SHA = "TEST"
|
||||
PR_NUMBER = -1
|
||||
EVENT_TYPE = Workflow.Event.PUSH
|
||||
CHANGE_URL = ""
|
||||
COMMIT_URL = ""
|
||||
|
||||
INSTANCE_TYPE = (
|
||||
os.getenv("INSTANCE_TYPE", None)
|
||||
# or Shell.get_output("ec2metadata --instance-type")
|
||||
or ""
|
||||
)
|
||||
INSTANCE_ID = (
|
||||
os.getenv("INSTANCE_ID", None)
|
||||
# or Shell.get_output("ec2metadata --instance-id")
|
||||
or ""
|
||||
)
|
||||
INSTANCE_LIFE_CYCLE = (
|
||||
os.getenv("INSTANCE_LIFE_CYCLE", None)
|
||||
# or Shell.get_output(
|
||||
# "curl -s --fail http://169.254.169.254/latest/meta-data/instance-life-cycle"
|
||||
# )
|
||||
or ""
|
||||
)
|
||||
|
||||
return _Environment(
|
||||
WORKFLOW_NAME=WORKFLOW_NAME,
|
||||
JOB_NAME=JOB_NAME,
|
||||
REPOSITORY=REPOSITORY,
|
||||
BRANCH=BRANCH,
|
||||
EVENT_FILE_PATH=EVENT_FILE_PATH,
|
||||
JOB_OUTPUT_STREAM=JOB_OUTPUT_STREAM,
|
||||
SHA=SHA,
|
||||
EVENT_TYPE=EVENT_TYPE,
|
||||
PR_NUMBER=PR_NUMBER,
|
||||
RUN_ID=RUN_ID,
|
||||
CHANGE_URL=CHANGE_URL,
|
||||
COMMIT_URL=COMMIT_URL,
|
||||
RUN_URL=RUN_URL,
|
||||
BASE_BRANCH=BASE_BRANCH,
|
||||
INSTANCE_TYPE=INSTANCE_TYPE,
|
||||
INSTANCE_ID=INSTANCE_ID,
|
||||
INSTANCE_LIFE_CYCLE=INSTANCE_LIFE_CYCLE,
|
||||
REPORT_INFO=[],
|
||||
)
|
||||
|
||||
def get_s3_prefix(self, latest=False):
|
||||
return self.get_s3_prefix_static(self.PR_NUMBER, self.BRANCH, self.SHA, latest)
|
||||
|
||||
@classmethod
|
||||
def get_s3_prefix_static(cls, pr_number, branch, sha, latest=False):
|
||||
prefix = ""
|
||||
if pr_number > 0:
|
||||
prefix += f"{pr_number}"
|
||||
else:
|
||||
prefix += f"{branch}"
|
||||
if latest:
|
||||
prefix += f"/latest"
|
||||
elif sha:
|
||||
prefix += f"/{sha}"
|
||||
return prefix
|
||||
|
||||
# TODO: find a better place for the function. This file should not import praktika.settings
|
||||
# as it's requires reading users config, that's why imports nested inside the function
|
||||
def get_report_url(self):
|
||||
import urllib
|
||||
|
||||
from praktika.settings import Settings
|
||||
from praktika.utils import Utils
|
||||
|
||||
path = Settings.HTML_S3_PATH
|
||||
for bucket, endpoint in Settings.S3_BUCKET_TO_HTTP_ENDPOINT.items():
|
||||
if bucket in path:
|
||||
path = path.replace(bucket, endpoint)
|
||||
break
|
||||
REPORT_URL = f"https://{path}/{Path(Settings.HTML_PAGE_FILE).name}?PR={self.PR_NUMBER}&sha={self.SHA}&name_0={urllib.parse.quote(self.WORKFLOW_NAME, safe='')}&name_1={urllib.parse.quote(self.JOB_NAME, safe='')}"
|
||||
return REPORT_URL
|
||||
|
||||
|
||||
def _to_object(data):
|
||||
if isinstance(data, dict):
|
||||
return SimpleNamespace(**{k: _to_object(v) for k, v in data.items()})
|
||||
elif isinstance(data, list):
|
||||
return [_to_object(i) for i in data]
|
||||
else:
|
||||
return data
|
128
praktika/_settings.py
Normal file
128
praktika/_settings.py
Normal file
@ -0,0 +1,128 @@
|
||||
import dataclasses
|
||||
from pathlib import Path
|
||||
from typing import Dict, Iterable, List, Optional
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class _Settings:
|
||||
######################################
|
||||
# Pipeline generation settings #
|
||||
######################################
|
||||
if Path("./ci_v2").is_dir():
|
||||
# TODO: hack for CH, remove
|
||||
CI_PATH = "./ci_v2"
|
||||
else:
|
||||
CI_PATH = "./ci"
|
||||
WORKFLOW_PATH_PREFIX: str = "./.github/workflows"
|
||||
WORKFLOWS_DIRECTORY: str = f"{CI_PATH}/workflows"
|
||||
SETTINGS_DIRECTORY: str = f"{CI_PATH}/settings"
|
||||
CI_CONFIG_JOB_NAME = "Config Workflow"
|
||||
DOCKER_BUILD_JOB_NAME = "Docker Builds"
|
||||
FINISH_WORKFLOW_JOB_NAME = "Finish Workflow"
|
||||
READY_FOR_MERGE_STATUS_NAME = "Ready for Merge"
|
||||
CI_CONFIG_RUNS_ON: Optional[List[str]] = None
|
||||
DOCKER_BUILD_RUNS_ON: Optional[List[str]] = None
|
||||
VALIDATE_FILE_PATHS: bool = True
|
||||
|
||||
######################################
|
||||
# Runtime Settings #
|
||||
######################################
|
||||
MAX_RETRIES_S3 = 3
|
||||
MAX_RETRIES_GH = 3
|
||||
|
||||
######################################
|
||||
# S3 (artifact storage) settings #
|
||||
######################################
|
||||
S3_ARTIFACT_PATH: str = ""
|
||||
|
||||
######################################
|
||||
# CI workspace settings #
|
||||
######################################
|
||||
TEMP_DIR: str = "/tmp/praktika"
|
||||
OUTPUT_DIR: str = f"{TEMP_DIR}/output"
|
||||
INPUT_DIR: str = f"{TEMP_DIR}/input"
|
||||
PYTHON_INTERPRETER: str = "python3"
|
||||
PYTHON_PACKET_MANAGER: str = "pip3"
|
||||
PYTHON_VERSION: str = "3.9"
|
||||
INSTALL_PYTHON_FOR_NATIVE_JOBS: bool = False
|
||||
INSTALL_PYTHON_REQS_FOR_NATIVE_JOBS: str = "./ci/requirements.txt"
|
||||
ENVIRONMENT_VAR_FILE: str = f"{TEMP_DIR}/environment.json"
|
||||
RUN_LOG: str = f"{TEMP_DIR}/praktika_run.log"
|
||||
|
||||
SECRET_GH_APP_ID: str = "GH_APP_ID"
|
||||
SECRET_GH_APP_PEM_KEY: str = "GH_APP_PEM_KEY"
|
||||
|
||||
ENV_SETUP_SCRIPT: str = "/tmp/praktika_setup_env.sh"
|
||||
WORKFLOW_STATUS_FILE: str = f"{TEMP_DIR}/workflow_status.json"
|
||||
|
||||
######################################
|
||||
# CI Cache settings #
|
||||
######################################
|
||||
CACHE_VERSION: int = 1
|
||||
CACHE_DIGEST_LEN: int = 20
|
||||
CACHE_S3_PATH: str = ""
|
||||
CACHE_LOCAL_PATH: str = f"{TEMP_DIR}/ci_cache"
|
||||
|
||||
######################################
|
||||
# Report settings #
|
||||
######################################
|
||||
HTML_S3_PATH: str = ""
|
||||
HTML_PAGE_FILE: str = "./praktika/json.html"
|
||||
TEXT_CONTENT_EXTENSIONS: Iterable[str] = frozenset([".txt", ".log"])
|
||||
S3_BUCKET_TO_HTTP_ENDPOINT: Optional[Dict[str, str]] = None
|
||||
|
||||
DOCKERHUB_USERNAME: str = ""
|
||||
DOCKERHUB_SECRET: str = ""
|
||||
DOCKER_WD: str = "/wd"
|
||||
|
||||
######################################
|
||||
# CI DB Settings #
|
||||
######################################
|
||||
SECRET_CI_DB_URL: str = "CI_DB_URL"
|
||||
SECRET_CI_DB_PASSWORD: str = "CI_DB_PASSWORD"
|
||||
CI_DB_DB_NAME = ""
|
||||
CI_DB_TABLE_NAME = ""
|
||||
CI_DB_INSERT_TIMEOUT_SEC = 5
|
||||
|
||||
|
||||
_USER_DEFINED_SETTINGS = [
|
||||
"S3_ARTIFACT_PATH",
|
||||
"CACHE_S3_PATH",
|
||||
"HTML_S3_PATH",
|
||||
"S3_BUCKET_TO_HTTP_ENDPOINT",
|
||||
"TEXT_CONTENT_EXTENSIONS",
|
||||
"TEMP_DIR",
|
||||
"OUTPUT_DIR",
|
||||
"INPUT_DIR",
|
||||
"CI_CONFIG_RUNS_ON",
|
||||
"DOCKER_BUILD_RUNS_ON",
|
||||
"CI_CONFIG_JOB_NAME",
|
||||
"PYTHON_INTERPRETER",
|
||||
"PYTHON_VERSION",
|
||||
"PYTHON_PACKET_MANAGER",
|
||||
"INSTALL_PYTHON_FOR_NATIVE_JOBS",
|
||||
"INSTALL_PYTHON_REQS_FOR_NATIVE_JOBS",
|
||||
"MAX_RETRIES_S3",
|
||||
"MAX_RETRIES_GH",
|
||||
"VALIDATE_FILE_PATHS",
|
||||
"DOCKERHUB_USERNAME",
|
||||
"DOCKERHUB_SECRET",
|
||||
"READY_FOR_MERGE_STATUS_NAME",
|
||||
"SECRET_CI_DB_URL",
|
||||
"SECRET_CI_DB_PASSWORD",
|
||||
"CI_DB_DB_NAME",
|
||||
"CI_DB_TABLE_NAME",
|
||||
"CI_DB_INSERT_TIMEOUT_SEC",
|
||||
"SECRET_GH_APP_PEM_KEY",
|
||||
"SECRET_GH_APP_ID",
|
||||
]
|
||||
|
||||
|
||||
class GHRunners:
|
||||
ubuntu = "ubuntu-latest"
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
for setting in _USER_DEFINED_SETTINGS:
|
||||
print(_Settings().__getattribute__(setting))
|
||||
# print(dataclasses.asdict(_Settings()))
|
33
praktika/artifact.py
Normal file
33
praktika/artifact.py
Normal file
@ -0,0 +1,33 @@
|
||||
from dataclasses import dataclass
|
||||
|
||||
|
||||
class Artifact:
|
||||
class Type:
|
||||
GH = "github"
|
||||
S3 = "s3"
|
||||
PHONY = "phony"
|
||||
|
||||
@dataclass
|
||||
class Config:
|
||||
"""
|
||||
name - artifact name
|
||||
type - artifact type, see Artifact.Type
|
||||
path - file path or glob, e.g. "path/**/[abc]rtifac?/*"
|
||||
"""
|
||||
|
||||
name: str
|
||||
type: str
|
||||
path: str
|
||||
_provided_by: str = ""
|
||||
_s3_path: str = ""
|
||||
|
||||
def is_s3_artifact(self):
|
||||
return self.type == Artifact.Type.S3
|
||||
|
||||
@classmethod
|
||||
def define_artifact(cls, name, type, path):
|
||||
return cls.Config(name=name, type=type, path=path)
|
||||
|
||||
@classmethod
|
||||
def define_gh_artifact(cls, name, path):
|
||||
return cls.define_artifact(name=name, type=cls.Type.GH, path=path)
|
127
praktika/cache.py
Normal file
127
praktika/cache.py
Normal file
@ -0,0 +1,127 @@
|
||||
import dataclasses
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
from praktika import Artifact, Job, Workflow
|
||||
from praktika._environment import _Environment
|
||||
from praktika.digest import Digest
|
||||
from praktika.s3 import S3
|
||||
from praktika.settings import Settings
|
||||
from praktika.utils import Utils
|
||||
|
||||
|
||||
class Cache:
|
||||
@dataclasses.dataclass
|
||||
class CacheRecord:
|
||||
class Type:
|
||||
SUCCESS = "success"
|
||||
|
||||
type: str
|
||||
sha: str
|
||||
pr_number: int
|
||||
branch: str
|
||||
|
||||
def dump(self, path):
|
||||
with open(path, "w", encoding="utf8") as f:
|
||||
json.dump(dataclasses.asdict(self), f)
|
||||
|
||||
@classmethod
|
||||
def from_fs(cls, path):
|
||||
with open(path, "r", encoding="utf8") as f:
|
||||
return Cache.CacheRecord(**json.load(f))
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, obj):
|
||||
return Cache.CacheRecord(**obj)
|
||||
|
||||
def __init__(self):
|
||||
self.digest = Digest()
|
||||
self.success = {} # type Dict[str, Any]
|
||||
|
||||
@classmethod
|
||||
def push_success_record(cls, job_name, job_digest, sha):
|
||||
type_ = Cache.CacheRecord.Type.SUCCESS
|
||||
record = Cache.CacheRecord(
|
||||
type=type_,
|
||||
sha=sha,
|
||||
pr_number=_Environment.get().PR_NUMBER,
|
||||
branch=_Environment.get().BRANCH,
|
||||
)
|
||||
assert (
|
||||
Settings.CACHE_S3_PATH
|
||||
), f"Setting CACHE_S3_PATH must be defined with enabled CI Cache"
|
||||
record_path = f"{Settings.CACHE_S3_PATH}/v{Settings.CACHE_VERSION}/{Utils.normalize_string(job_name)}/{job_digest}"
|
||||
record_file = Path(Settings.TEMP_DIR) / type_
|
||||
record.dump(record_file)
|
||||
S3.copy_file_to_s3(s3_path=record_path, local_path=record_file)
|
||||
record_file.unlink()
|
||||
|
||||
def fetch_success(self, job_name, job_digest):
|
||||
type_ = Cache.CacheRecord.Type.SUCCESS
|
||||
assert (
|
||||
Settings.CACHE_S3_PATH
|
||||
), f"Setting CACHE_S3_PATH must be defined with enabled CI Cache"
|
||||
record_path = f"{Settings.CACHE_S3_PATH}/v{Settings.CACHE_VERSION}/{Utils.normalize_string(job_name)}/{job_digest}/{type_}"
|
||||
record_file_local_dir = (
|
||||
f"{Settings.CACHE_LOCAL_PATH}/{Utils.normalize_string(job_name)}/"
|
||||
)
|
||||
Path(record_file_local_dir).mkdir(parents=True, exist_ok=True)
|
||||
|
||||
if S3.head_object(record_path):
|
||||
res = S3.copy_file_from_s3(
|
||||
s3_path=record_path, local_path=record_file_local_dir
|
||||
)
|
||||
else:
|
||||
res = None
|
||||
|
||||
if res:
|
||||
print(f"Cache record found, job [{job_name}], digest [{job_digest}]")
|
||||
self.success[job_name] = True
|
||||
return Cache.CacheRecord.from_fs(Path(record_file_local_dir) / type_)
|
||||
return None
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# test
|
||||
c = Cache()
|
||||
workflow = Workflow.Config(
|
||||
name="TEST",
|
||||
event=Workflow.Event.PULL_REQUEST,
|
||||
jobs=[
|
||||
Job.Config(
|
||||
name="JobA",
|
||||
runs_on=["some"],
|
||||
command="python -m unittest ./ci/tests/example_1/test_example_produce_artifact.py",
|
||||
provides=["greet"],
|
||||
job_requirements=Job.Requirements(
|
||||
python_requirements_txt="./ci/requirements.txt"
|
||||
),
|
||||
digest_config=Job.CacheDigestConfig(
|
||||
# example: use glob to include files
|
||||
include_paths=["./ci/tests/example_1/test_example_consume*.py"],
|
||||
),
|
||||
),
|
||||
Job.Config(
|
||||
name="JobB",
|
||||
runs_on=["some"],
|
||||
command="python -m unittest ./ci/tests/example_1/test_example_consume_artifact.py",
|
||||
requires=["greet"],
|
||||
job_requirements=Job.Requirements(
|
||||
python_requirements_txt="./ci/requirements.txt"
|
||||
),
|
||||
digest_config=Job.CacheDigestConfig(
|
||||
# example: use dir to include files recursively
|
||||
include_paths=["./ci/tests/example_1"],
|
||||
# example: use glob to exclude files from digest
|
||||
exclude_paths=[
|
||||
"./ci/tests/example_1/test_example_consume*",
|
||||
"./**/*.pyc",
|
||||
],
|
||||
),
|
||||
),
|
||||
],
|
||||
artifacts=[Artifact.Config(type="s3", name="greet", path="hello")],
|
||||
enable_cache=True,
|
||||
)
|
||||
for job in workflow.jobs:
|
||||
print(c.digest.calc_job_digest(job))
|
136
praktika/cidb.py
Normal file
136
praktika/cidb.py
Normal file
@ -0,0 +1,136 @@
|
||||
import copy
|
||||
import dataclasses
|
||||
import json
|
||||
from typing import Optional
|
||||
|
||||
import requests
|
||||
from praktika._environment import _Environment
|
||||
from praktika.result import Result
|
||||
from praktika.settings import Settings
|
||||
from praktika.utils import Utils
|
||||
|
||||
|
||||
class CIDB:
|
||||
@dataclasses.dataclass
|
||||
class TableRecord:
|
||||
pull_request_number: int
|
||||
commit_sha: str
|
||||
commit_url: str
|
||||
check_name: str
|
||||
check_status: str
|
||||
check_duration_ms: int
|
||||
check_start_time: int
|
||||
report_url: str
|
||||
pull_request_url: str
|
||||
base_ref: str
|
||||
base_repo: str
|
||||
head_ref: str
|
||||
head_repo: str
|
||||
task_url: str
|
||||
instance_type: str
|
||||
instance_id: str
|
||||
test_name: str
|
||||
test_status: str
|
||||
test_duration_ms: Optional[int]
|
||||
test_context_raw: str
|
||||
|
||||
def __init__(self, url, passwd):
|
||||
self.url = url
|
||||
self.auth = {
|
||||
"X-ClickHouse-User": "default",
|
||||
"X-ClickHouse-Key": passwd,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def json_data_generator(cls, result: Result):
|
||||
env = _Environment.get()
|
||||
base_record = cls.TableRecord(
|
||||
pull_request_number=env.PR_NUMBER,
|
||||
commit_sha=env.SHA,
|
||||
commit_url=env.COMMIT_URL,
|
||||
check_name=result.name,
|
||||
check_status=result.status,
|
||||
check_duration_ms=int(result.duration * 1000),
|
||||
check_start_time=Utils.timestamp_to_str(result.start_time),
|
||||
report_url=env.get_report_url(),
|
||||
pull_request_url=env.CHANGE_URL,
|
||||
base_ref=env.BASE_BRANCH,
|
||||
base_repo=env.REPOSITORY,
|
||||
head_ref=env.BRANCH,
|
||||
# TODO: remove from table?
|
||||
head_repo=env.REPOSITORY,
|
||||
# TODO: remove from table?
|
||||
task_url="",
|
||||
instance_type=",".join([env.INSTANCE_TYPE, env.INSTANCE_LIFE_CYCLE]),
|
||||
instance_id=env.INSTANCE_ID,
|
||||
test_name="",
|
||||
test_status="",
|
||||
test_duration_ms=None,
|
||||
test_context_raw=result.info,
|
||||
)
|
||||
yield json.dumps(dataclasses.asdict(base_record))
|
||||
for result_ in result.results:
|
||||
record = copy.deepcopy(base_record)
|
||||
record.test_name = result_.name
|
||||
if result_.start_time:
|
||||
record.check_start_time = (Utils.timestamp_to_str(result.start_time),)
|
||||
record.test_status = result_.status
|
||||
record.test_duration_ms = int(result_.duration * 1000)
|
||||
record.test_context_raw = result_.info
|
||||
yield json.dumps(dataclasses.asdict(record))
|
||||
|
||||
def insert(self, result: Result):
|
||||
# Create a session object
|
||||
params = {
|
||||
"database": Settings.CI_DB_DB_NAME,
|
||||
"query": f"INSERT INTO {Settings.CI_DB_TABLE_NAME} FORMAT JSONEachRow",
|
||||
"date_time_input_format": "best_effort",
|
||||
"send_logs_level": "warning",
|
||||
}
|
||||
|
||||
session = requests.Session()
|
||||
|
||||
for json_str in self.json_data_generator(result):
|
||||
try:
|
||||
response1 = session.post(
|
||||
url=self.url,
|
||||
params=params,
|
||||
data=json_str,
|
||||
headers=self.auth,
|
||||
timeout=Settings.CI_DB_INSERT_TIMEOUT_SEC,
|
||||
)
|
||||
except Exception as ex:
|
||||
raise ex
|
||||
|
||||
session.close()
|
||||
|
||||
def check(self):
|
||||
# Create a session object
|
||||
params = {
|
||||
"database": Settings.CI_DB_DB_NAME,
|
||||
"query": f"SELECT 1",
|
||||
}
|
||||
try:
|
||||
response = requests.post(
|
||||
url=self.url,
|
||||
params=params,
|
||||
data="",
|
||||
headers=self.auth,
|
||||
timeout=Settings.CI_DB_INSERT_TIMEOUT_SEC,
|
||||
)
|
||||
if not response.ok:
|
||||
print("ERROR: No connection to CI DB")
|
||||
return (
|
||||
False,
|
||||
f"ERROR: No connection to CI DB [{response.status_code}/{response.reason}]",
|
||||
)
|
||||
if not response.json() == 1:
|
||||
print("ERROR: CI DB smoke test failed select 1 == 1")
|
||||
return (
|
||||
False,
|
||||
f"ERROR: CI DB smoke test failed [select 1 ==> {response.json()}]",
|
||||
)
|
||||
except Exception as ex:
|
||||
print(f"ERROR: Exception [{ex}]")
|
||||
return False, "CIDB: ERROR: Exception [{ex}]"
|
||||
return True, ""
|
100
praktika/digest.py
Normal file
100
praktika/digest.py
Normal file
@ -0,0 +1,100 @@
|
||||
import dataclasses
|
||||
import hashlib
|
||||
from hashlib import md5
|
||||
from typing import List
|
||||
|
||||
from praktika import Job
|
||||
from praktika.docker import Docker
|
||||
from praktika.settings import Settings
|
||||
from praktika.utils import Utils
|
||||
|
||||
|
||||
class Digest:
|
||||
def __init__(self):
|
||||
self.digest_cache = {}
|
||||
|
||||
@staticmethod
|
||||
def _hash_digest_config(digest_config: Job.CacheDigestConfig) -> str:
|
||||
data_dict = dataclasses.asdict(digest_config)
|
||||
hash_obj = md5()
|
||||
hash_obj.update(str(data_dict).encode())
|
||||
hash_string = hash_obj.hexdigest()
|
||||
return hash_string
|
||||
|
||||
def calc_job_digest(self, job_config: Job.Config):
|
||||
config = job_config.digest_config
|
||||
if not config:
|
||||
return "f" * Settings.CACHE_DIGEST_LEN
|
||||
|
||||
cache_key = self._hash_digest_config(config)
|
||||
|
||||
if cache_key in self.digest_cache:
|
||||
return self.digest_cache[cache_key]
|
||||
|
||||
included_files = Utils.traverse_paths(
|
||||
job_config.digest_config.include_paths,
|
||||
job_config.digest_config.exclude_paths,
|
||||
sorted=True,
|
||||
)
|
||||
|
||||
print(f"calc digest: hash_key [{cache_key}], include [{included_files}] files")
|
||||
# Sort files to ensure consistent hash calculation
|
||||
included_files.sort()
|
||||
|
||||
# Calculate MD5 hash
|
||||
res = ""
|
||||
if not included_files:
|
||||
res = "f" * Settings.CACHE_DIGEST_LEN
|
||||
print(f"NOTE: empty digest config [{config}] - return dummy digest")
|
||||
else:
|
||||
hash_md5 = hashlib.md5()
|
||||
for file_path in included_files:
|
||||
res = self._calc_file_digest(file_path, hash_md5)
|
||||
assert res
|
||||
self.digest_cache[cache_key] = res
|
||||
return res
|
||||
|
||||
def calc_docker_digest(
|
||||
self,
|
||||
docker_config: Docker.Config,
|
||||
dependency_configs: List[Docker.Config],
|
||||
hash_md5=None,
|
||||
):
|
||||
"""
|
||||
|
||||
:param hash_md5:
|
||||
:param dependency_configs: list of Docker.Config(s) that :param docker_config: depends on
|
||||
:param docker_config: Docker.Config to calculate digest for
|
||||
:return:
|
||||
"""
|
||||
print(f"Calculate digest for docker [{docker_config.name}]")
|
||||
paths = Utils.traverse_path(docker_config.path, sorted=True)
|
||||
if not hash_md5:
|
||||
hash_md5 = hashlib.md5()
|
||||
|
||||
dependencies = []
|
||||
for dependency_name in docker_config.depends_on:
|
||||
for dependency_config in dependency_configs:
|
||||
if dependency_config.name == dependency_name:
|
||||
print(
|
||||
f"Add docker [{dependency_config.name}] as dependency for docker [{docker_config.name}] digest calculation"
|
||||
)
|
||||
dependencies.append(dependency_config)
|
||||
|
||||
for dependency in dependencies:
|
||||
_ = self.calc_docker_digest(dependency, dependency_configs, hash_md5)
|
||||
|
||||
for path in paths:
|
||||
_ = self._calc_file_digest(path, hash_md5=hash_md5)
|
||||
|
||||
return hash_md5.hexdigest()[: Settings.CACHE_DIGEST_LEN]
|
||||
|
||||
@staticmethod
|
||||
def _calc_file_digest(file_path, hash_md5):
|
||||
# Calculate MD5 hash
|
||||
with open(file_path, "rb") as f:
|
||||
for chunk in iter(lambda: f.read(4096), b""):
|
||||
hash_md5.update(chunk)
|
||||
|
||||
res = hash_md5.hexdigest()[: Settings.CACHE_DIGEST_LEN]
|
||||
return res
|
60
praktika/docker.py
Normal file
60
praktika/docker.py
Normal file
@ -0,0 +1,60 @@
|
||||
import dataclasses
|
||||
from typing import List
|
||||
|
||||
from praktika.utils import Shell
|
||||
|
||||
|
||||
class Docker:
|
||||
class Platforms:
|
||||
ARM = "linux/arm64"
|
||||
AMD = "linux/amd64"
|
||||
arm_amd = [ARM, AMD]
|
||||
|
||||
@dataclasses.dataclass
|
||||
class Config:
|
||||
name: str
|
||||
path: str
|
||||
depends_on: List[str]
|
||||
platforms: List[str]
|
||||
|
||||
@classmethod
|
||||
def build(cls, config: "Docker.Config", log_file, digests, add_latest):
|
||||
tags_substr = f" -t {config.name}:{digests[config.name]}"
|
||||
if add_latest:
|
||||
tags_substr = f" -t {config.name}:latest"
|
||||
|
||||
from_tag = ""
|
||||
if config.depends_on:
|
||||
assert (
|
||||
len(config.depends_on) == 1
|
||||
), f"Only one dependency in depends_on is currently supported, docker [{config}]"
|
||||
from_tag = f" --build-arg FROM_TAG={digests[config.depends_on[0]]}"
|
||||
|
||||
command = f"docker buildx build --platform {','.join(config.platforms)} {tags_substr} {from_tag} --cache-to type=inline --cache-from type=registry,ref={config.name} --push {config.path}"
|
||||
return Shell.run(command, log_file=log_file, verbose=True)
|
||||
|
||||
@classmethod
|
||||
def sort_in_build_order(cls, dockers: List["Docker.Config"]):
|
||||
ready_names = []
|
||||
i = 0
|
||||
while i < len(dockers):
|
||||
docker = dockers[i]
|
||||
if not docker.depends_on or all(
|
||||
dep in ready_names for dep in docker.depends_on
|
||||
):
|
||||
ready_names.append(docker.name)
|
||||
i += 1
|
||||
else:
|
||||
dockers.append(dockers.pop(i))
|
||||
return dockers
|
||||
|
||||
@classmethod
|
||||
def login(cls, user_name, user_password):
|
||||
print("Docker: log in to dockerhub")
|
||||
return Shell.check(
|
||||
f"docker login --username '{user_name}' --password-stdin",
|
||||
strict=True,
|
||||
stdin_str=user_password,
|
||||
encoding="utf-8",
|
||||
verbose=True,
|
||||
)
|
3
praktika/environment.py
Normal file
3
praktika/environment.py
Normal file
@ -0,0 +1,3 @@
|
||||
from praktika._environment import _Environment
|
||||
|
||||
Environment = _Environment.get()
|
0
praktika/execution/__init__.py
Normal file
0
praktika/execution/__init__.py
Normal file
4
praktika/execution/__main__.py
Normal file
4
praktika/execution/__main__.py
Normal file
@ -0,0 +1,4 @@
|
||||
from praktika.execution.machine_init import run
|
||||
|
||||
if __name__ == "__main__":
|
||||
run()
|
31
praktika/execution/execution_settings.py
Normal file
31
praktika/execution/execution_settings.py
Normal file
@ -0,0 +1,31 @@
|
||||
import os
|
||||
|
||||
from praktika.utils import MetaClasses
|
||||
|
||||
|
||||
class ScalingType(metaclass=MetaClasses.WithIter):
|
||||
DISABLED = "disabled"
|
||||
AUTOMATIC_SCALE_DOWN = "scale_down"
|
||||
AUTOMATIC_SCALE_UP_DOWN = "scale"
|
||||
|
||||
|
||||
class DefaultExecutionSettings:
|
||||
GH_ACTIONS_DIRECTORY: str = "/home/ubuntu/gh_actions"
|
||||
RUNNER_SCALING_TYPE: str = ScalingType.AUTOMATIC_SCALE_UP_DOWN
|
||||
MAX_WAIT_TIME_BEFORE_SCALE_DOWN_SEC: int = 30
|
||||
|
||||
|
||||
class ExecutionSettings:
|
||||
GH_ACTIONS_DIRECTORY = os.getenv(
|
||||
"GH_ACTIONS_DIRECTORY", DefaultExecutionSettings.GH_ACTIONS_DIRECTORY
|
||||
)
|
||||
RUNNER_SCALING_TYPE = os.getenv(
|
||||
"RUNNER_SCALING_TYPE", DefaultExecutionSettings.RUNNER_SCALING_TYPE
|
||||
)
|
||||
MAX_WAIT_TIME_BEFORE_SCALE_DOWN_SEC = int(
|
||||
os.getenv(
|
||||
"MAX_WAIT_TIME_BEFORE_SCALE_DOWN_SEC",
|
||||
DefaultExecutionSettings.MAX_WAIT_TIME_BEFORE_SCALE_DOWN_SEC,
|
||||
)
|
||||
)
|
||||
LOCAL_EXECUTION = bool(os.getenv("CLOUD", "0") == "0")
|
338
praktika/execution/machine_init.py
Normal file
338
praktika/execution/machine_init.py
Normal file
@ -0,0 +1,338 @@
|
||||
import os
|
||||
import platform
|
||||
import signal
|
||||
import time
|
||||
import traceback
|
||||
|
||||
import requests
|
||||
from praktika.execution.execution_settings import ExecutionSettings, ScalingType
|
||||
from praktika.utils import ContextManager, Shell
|
||||
|
||||
|
||||
class StateMachine:
|
||||
class StateNames:
|
||||
INIT = "init"
|
||||
WAIT = "wait"
|
||||
RUN = "run"
|
||||
|
||||
def __init__(self):
|
||||
self.state = self.StateNames.INIT
|
||||
self.scale_type = ExecutionSettings.RUNNER_SCALING_TYPE
|
||||
self.machine = Machine(scaling_type=self.scale_type).update_instance_info()
|
||||
self.state_updated_at = int(time.time())
|
||||
self.forked = False
|
||||
|
||||
def kick(self):
|
||||
if self.state == self.StateNames.INIT:
|
||||
self.machine.config_actions().run_actions_async()
|
||||
print("State Machine: INIT -> WAIT")
|
||||
self.state = self.StateNames.WAIT
|
||||
self.state_updated_at = int(time.time())
|
||||
# TODO: add monitoring
|
||||
if not self.machine.is_actions_process_healthy():
|
||||
print(f"ERROR: GH runner process unexpectedly died")
|
||||
self.machine.self_terminate(decrease_capacity=False)
|
||||
elif self.state == self.StateNames.WAIT:
|
||||
res = self.machine.check_job_assigned()
|
||||
if res:
|
||||
print("State Machine: WAIT -> RUN")
|
||||
self.state = self.StateNames.RUN
|
||||
self.state_updated_at = int(time.time())
|
||||
self.check_scale_up()
|
||||
else:
|
||||
self.check_scale_down()
|
||||
elif self.state == self.StateNames.RUN:
|
||||
res = self.machine.check_job_running()
|
||||
if res:
|
||||
pass
|
||||
else:
|
||||
print("State Machine: RUN -> INIT")
|
||||
self.state = self.StateNames.INIT
|
||||
self.state_updated_at = int(time.time())
|
||||
|
||||
def check_scale_down(self):
|
||||
if self.scale_type not in (
|
||||
ScalingType.AUTOMATIC_SCALE_DOWN,
|
||||
ScalingType.AUTOMATIC_SCALE_UP_DOWN,
|
||||
):
|
||||
return
|
||||
if ScalingType.AUTOMATIC_SCALE_UP_DOWN and not self.forked:
|
||||
print(
|
||||
f"Scaling type is AUTOMATIC_SCALE_UP_DOWN and machine has not run a job - do not scale down"
|
||||
)
|
||||
return
|
||||
if (
|
||||
int(time.time()) - self.state_updated_at
|
||||
> ExecutionSettings.MAX_WAIT_TIME_BEFORE_SCALE_DOWN_SEC
|
||||
):
|
||||
print(
|
||||
f"No job assigned for more than MAX_WAIT_TIME_BEFORE_SCALE_DOWN_SEC [{ExecutionSettings.MAX_WAIT_TIME_BEFORE_SCALE_DOWN_SEC}] - scale down the instance"
|
||||
)
|
||||
if not ExecutionSettings.LOCAL_EXECUTION:
|
||||
self.machine.self_terminate(decrease_capacity=True)
|
||||
else:
|
||||
print("Local execution - skip scaling operation")
|
||||
|
||||
def check_scale_up(self):
|
||||
if self.scale_type not in (ScalingType.AUTOMATIC_SCALE_UP_DOWN,):
|
||||
return
|
||||
if self.forked:
|
||||
print("This instance already forked once - do not scale up")
|
||||
return
|
||||
self.machine.self_fork()
|
||||
self.forked = True
|
||||
|
||||
def run(self):
|
||||
self.machine.unconfig_actions()
|
||||
while True:
|
||||
self.kick()
|
||||
time.sleep(5)
|
||||
|
||||
def terminate(self):
|
||||
try:
|
||||
self.machine.unconfig_actions()
|
||||
except:
|
||||
print("WARNING: failed to unconfig runner")
|
||||
if not ExecutionSettings.LOCAL_EXECUTION:
|
||||
if self.machine is not None:
|
||||
self.machine.self_terminate(decrease_capacity=False)
|
||||
time.sleep(10)
|
||||
# wait termination
|
||||
print("ERROR: failed to terminate instance via aws cli - try os call")
|
||||
os.system("sudo shutdown now")
|
||||
else:
|
||||
print("NOTE: Local execution - machine won't be terminated")
|
||||
|
||||
|
||||
class Machine:
|
||||
@staticmethod
|
||||
def get_latest_gh_actions_release():
|
||||
url = f"https://api.github.com/repos/actions/runner/releases/latest"
|
||||
response = requests.get(url, timeout=5)
|
||||
if response.status_code == 200:
|
||||
latest_release = response.json()
|
||||
return latest_release["tag_name"].removeprefix("v")
|
||||
else:
|
||||
print(f"Failed to get the latest release: {response.status_code}")
|
||||
return None
|
||||
|
||||
def __init__(self, scaling_type):
|
||||
self.os_name = platform.system().lower()
|
||||
assert self.os_name == "linux", f"Unsupported OS [{self.os_name}]"
|
||||
if platform.machine() == "x86_64":
|
||||
self.arch = "x64"
|
||||
elif "aarch64" in platform.machine().lower():
|
||||
self.arch = "arm64"
|
||||
else:
|
||||
assert False, f"Unsupported arch [{platform.machine()}]"
|
||||
self.instance_id = None
|
||||
self.asg_name = None
|
||||
self.runner_api_endpoint = None
|
||||
self.runner_type = None
|
||||
self.labels = []
|
||||
self.proc = None
|
||||
assert scaling_type in ScalingType
|
||||
self.scaling_type = scaling_type
|
||||
|
||||
def install_gh_actions_runner(self):
|
||||
gh_actions_version = self.get_latest_gh_actions_release()
|
||||
assert self.os_name and gh_actions_version and self.arch
|
||||
Shell.check(
|
||||
f"rm -rf {ExecutionSettings.GH_ACTIONS_DIRECTORY}",
|
||||
strict=True,
|
||||
verbose=True,
|
||||
)
|
||||
Shell.check(
|
||||
f"mkdir {ExecutionSettings.GH_ACTIONS_DIRECTORY}", strict=True, verbose=True
|
||||
)
|
||||
with ContextManager.cd(ExecutionSettings.GH_ACTIONS_DIRECTORY):
|
||||
Shell.check(
|
||||
f"curl -O -L https://github.com/actions/runner/releases/download/v{gh_actions_version}/actions-runner-{self.os_name}-{self.arch}-{gh_actions_version}.tar.gz",
|
||||
strict=True,
|
||||
verbose=True,
|
||||
)
|
||||
Shell.check(f"tar xzf *tar.gz", strict=True, verbose=True)
|
||||
Shell.check(f"rm -f *tar.gz", strict=True, verbose=True)
|
||||
Shell.check(f"sudo ./bin/installdependencies.sh", strict=True, verbose=True)
|
||||
Shell.check(
|
||||
f"chown -R ubuntu:ubuntu {ExecutionSettings.GH_ACTIONS_DIRECTORY}",
|
||||
strict=True,
|
||||
verbose=True,
|
||||
)
|
||||
|
||||
def _get_gh_token_from_ssm(self):
|
||||
gh_token = Shell.get_output_or_raise(
|
||||
"/usr/local/bin/aws ssm get-parameter --name github_runner_registration_token --with-decryption --output text --query Parameter.Value"
|
||||
)
|
||||
return gh_token
|
||||
|
||||
def update_instance_info(self):
|
||||
self.instance_id = Shell.get_output_or_raise("ec2metadata --instance-id")
|
||||
assert self.instance_id
|
||||
self.asg_name = Shell.get_output(
|
||||
f"aws ec2 describe-instances --instance-id {self.instance_id} --query \"Reservations[].Instances[].Tags[?Key=='aws:autoscaling:groupName'].Value\" --output text"
|
||||
)
|
||||
# self.runner_type = Shell.get_output_or_raise(
|
||||
# f'/usr/local/bin/aws ec2 describe-tags --filters "Name=resource-id,Values={self.instance_id}" --query "Tags[?Key==\'github:runner-type\'].Value" --output text'
|
||||
# )
|
||||
self.runner_type = self.asg_name
|
||||
if (
|
||||
self.scaling_type != ScalingType.DISABLED
|
||||
and not ExecutionSettings.LOCAL_EXECUTION
|
||||
):
|
||||
assert (
|
||||
self.asg_name and self.runner_type
|
||||
), f"Failed to retrieve ASG name, which is required for scaling_type [{self.scaling_type}]"
|
||||
org = os.getenv("MY_ORG", "")
|
||||
assert (
|
||||
org
|
||||
), "MY_ORG env variable myst be set to use init script for runner machine"
|
||||
self.runner_api_endpoint = f"https://github.com/{org}"
|
||||
|
||||
self.labels = ["self-hosted", self.runner_type]
|
||||
return self
|
||||
|
||||
@classmethod
|
||||
def check_job_assigned(cls):
|
||||
runner_pid = Shell.get_output_or_raise("pgrep Runner.Listener")
|
||||
if not runner_pid:
|
||||
print("check_job_assigned: No runner pid")
|
||||
return False
|
||||
log_file = Shell.get_output_or_raise(
|
||||
f"lsof -p {runner_pid} | grep -o {ExecutionSettings.GH_ACTIONS_DIRECTORY}/_diag/Runner.*log"
|
||||
)
|
||||
if not log_file:
|
||||
print("check_job_assigned: No log file")
|
||||
return False
|
||||
return Shell.check(f"grep -q 'Terminal] .* Running job:' {log_file}")
|
||||
|
||||
def check_job_running(self):
|
||||
if self.proc is None:
|
||||
print(f"WARNING: No job started")
|
||||
return False
|
||||
exit_code = self.proc.poll()
|
||||
if exit_code is None:
|
||||
return True
|
||||
else:
|
||||
print(f"Job runner finished with exit code [{exit_code}]")
|
||||
self.proc = None
|
||||
return False
|
||||
|
||||
def config_actions(self):
|
||||
if not self.instance_id:
|
||||
self.update_instance_info()
|
||||
token = self._get_gh_token_from_ssm()
|
||||
assert token and self.instance_id and self.runner_api_endpoint and self.labels
|
||||
command = f"sudo -u ubuntu {ExecutionSettings.GH_ACTIONS_DIRECTORY}/config.sh --token {token} \
|
||||
--url {self.runner_api_endpoint} --ephemeral --unattended --replace \
|
||||
--runnergroup Default --labels {','.join(self.labels)} --work wd --name {self.instance_id}"
|
||||
res = 1
|
||||
i = 0
|
||||
while i < 10 and res != 0:
|
||||
res = Shell.run(command)
|
||||
i += 1
|
||||
if res != 0:
|
||||
print(
|
||||
f"ERROR: failed to configure GH actions runner after [{i}] attempts, exit code [{res}], retry after 10s"
|
||||
)
|
||||
time.sleep(10)
|
||||
self._get_gh_token_from_ssm()
|
||||
if res == 0:
|
||||
print("GH action runner has been configured")
|
||||
else:
|
||||
assert False, "GH actions runner configuration failed"
|
||||
return self
|
||||
|
||||
def unconfig_actions(self):
|
||||
token = self._get_gh_token_from_ssm()
|
||||
command = f"sudo -u ubuntu {ExecutionSettings.GH_ACTIONS_DIRECTORY}/config.sh remove --token {token}"
|
||||
Shell.check(command, strict=True)
|
||||
return self
|
||||
|
||||
def run_actions_async(self):
|
||||
command = f"sudo -u ubuntu {ExecutionSettings.GH_ACTIONS_DIRECTORY}/run.sh"
|
||||
self.proc = Shell.run_async(command)
|
||||
assert self.proc is not None
|
||||
return self
|
||||
|
||||
def is_actions_process_healthy(self):
|
||||
try:
|
||||
if self.proc.poll() is None:
|
||||
return True
|
||||
|
||||
stdout, stderr = self.proc.communicate()
|
||||
|
||||
if self.proc.returncode != 0:
|
||||
# Handle failure
|
||||
print(
|
||||
f"GH Action process failed with return code {self.proc.returncode}"
|
||||
)
|
||||
print(f"Error output: {stderr}")
|
||||
return False
|
||||
else:
|
||||
print(f"GH Action process is not running")
|
||||
return False
|
||||
except Exception as e:
|
||||
print(f"GH Action process exception: {e}")
|
||||
return False
|
||||
|
||||
def self_terminate(self, decrease_capacity):
|
||||
print(
|
||||
f"WARNING: Self terminate is called, decrease_capacity [{decrease_capacity}]"
|
||||
)
|
||||
traceback.print_stack()
|
||||
if not self.instance_id:
|
||||
self.update_instance_info()
|
||||
assert self.instance_id
|
||||
command = f"aws autoscaling terminate-instance-in-auto-scaling-group --instance-id {self.instance_id}"
|
||||
if decrease_capacity:
|
||||
command += " --should-decrement-desired-capacity"
|
||||
else:
|
||||
command += " --no-should-decrement-desired-capacity"
|
||||
Shell.check(
|
||||
command=command,
|
||||
verbose=True,
|
||||
)
|
||||
|
||||
def self_fork(self):
|
||||
current_capacity = Shell.get_output(
|
||||
f'aws autoscaling describe-auto-scaling-groups --auto-scaling-group-name {self.asg_name} \
|
||||
--query "AutoScalingGroups[0].DesiredCapacity" --output text'
|
||||
)
|
||||
current_capacity = int(current_capacity)
|
||||
if not current_capacity:
|
||||
print("ERROR: failed to get current capacity - cannot scale up")
|
||||
return
|
||||
desired_capacity = current_capacity + 1
|
||||
command = f"aws autoscaling set-desired-capacity --auto-scaling-group-name {self.asg_name} --desired-capacity {desired_capacity}"
|
||||
print(f"Increase capacity [{current_capacity} -> {desired_capacity}]")
|
||||
res = Shell.check(
|
||||
command=command,
|
||||
verbose=True,
|
||||
)
|
||||
if not res:
|
||||
print("ERROR: failed to increase capacity - cannot scale up")
|
||||
|
||||
|
||||
def handle_signal(signum, _frame):
|
||||
print(f"FATAL: Received signal {signum}")
|
||||
raise RuntimeError(f"killed by signal {signum}")
|
||||
|
||||
|
||||
def run():
|
||||
signal.signal(signal.SIGINT, handle_signal)
|
||||
signal.signal(signal.SIGTERM, handle_signal)
|
||||
m = None
|
||||
try:
|
||||
m = StateMachine()
|
||||
m.run()
|
||||
except Exception as e:
|
||||
print(f"FATAL: Exception [{e}] - terminate instance")
|
||||
time.sleep(10)
|
||||
if m:
|
||||
m.terminate()
|
||||
raise e
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
run()
|
102
praktika/favicon/lambda_function.py
Normal file
102
praktika/favicon/lambda_function.py
Normal file
@ -0,0 +1,102 @@
|
||||
import base64
|
||||
import random
|
||||
import struct
|
||||
import zlib
|
||||
|
||||
|
||||
def create_favicon():
|
||||
# Image dimensions
|
||||
width = 32
|
||||
height = 32
|
||||
|
||||
# Initialize a transparent background image (RGBA: 4 bytes per pixel)
|
||||
image_data = bytearray(
|
||||
[0, 0, 0, 0] * width * height
|
||||
) # Set alpha to 0 for transparency
|
||||
|
||||
# Draw 4 vertical lines with color #FAFF68 (RGB: 250, 255, 104)
|
||||
line_color = [250, 255, 104, 255] # RGBA for #FAFF68 with full opacity
|
||||
line_width = 4
|
||||
space_width = 3
|
||||
x_start = space_width
|
||||
line_number = 4
|
||||
|
||||
line_height = height - space_width
|
||||
|
||||
for i in range(line_number):
|
||||
# Randomly pick a starting y position for each line
|
||||
y_start = random.randint(0, height - 1)
|
||||
|
||||
# Draw the line with random shift along Y-axis
|
||||
for y in range(line_height):
|
||||
y_pos = (y + y_start) % height
|
||||
for x in range(line_width):
|
||||
pixel_index = (y_pos * width + x_start + x) * 4
|
||||
image_data[pixel_index : pixel_index + 4] = line_color
|
||||
|
||||
x_start += line_width + space_width
|
||||
|
||||
# Convert the RGBA image to PNG format
|
||||
png_data = create_png(width, height, image_data)
|
||||
|
||||
# Convert PNG to ICO format
|
||||
ico_data = create_ico(png_data)
|
||||
|
||||
return ico_data
|
||||
|
||||
|
||||
def create_png(width, height, image_data):
|
||||
def write_chunk(chunk_type, data):
|
||||
chunk_len = struct.pack(">I", len(data))
|
||||
chunk_crc = struct.pack(">I", zlib.crc32(chunk_type + data) & 0xFFFFFFFF)
|
||||
return chunk_len + chunk_type + data + chunk_crc
|
||||
|
||||
png_signature = b"\x89PNG\r\n\x1a\n"
|
||||
ihdr_chunk = struct.pack(">IIBBBBB", width, height, 8, 6, 0, 0, 0)
|
||||
idat_data = zlib.compress(
|
||||
b"".join(
|
||||
b"\x00" + image_data[y * width * 4 : (y + 1) * width * 4]
|
||||
for y in range(height)
|
||||
),
|
||||
9,
|
||||
)
|
||||
idat_chunk = write_chunk(b"IDAT", idat_data)
|
||||
iend_chunk = write_chunk(b"IEND", b"")
|
||||
|
||||
return png_signature + write_chunk(b"IHDR", ihdr_chunk) + idat_chunk + iend_chunk
|
||||
|
||||
|
||||
def create_ico(png_data):
|
||||
# ICO header: reserved (2 bytes), type (2 bytes), image count (2 bytes)
|
||||
ico_header = struct.pack("<HHH", 0, 1, 1)
|
||||
# ICO entry: width, height, color count, reserved, color planes, bits per pixel, size, offset
|
||||
ico_entry = struct.pack("<BBBBHHII", 32, 32, 0, 0, 1, 32, len(png_data), 22)
|
||||
return ico_header + ico_entry + png_data
|
||||
|
||||
|
||||
def save_favicon_to_disk(ico_data, file_path="favicon.ico"):
|
||||
with open(file_path, "wb") as f:
|
||||
f.write(ico_data)
|
||||
print(f"Favicon saved to {file_path}")
|
||||
|
||||
|
||||
def lambda_handler(event, context):
|
||||
# Generate the favicon
|
||||
favicon_data = create_favicon()
|
||||
|
||||
# Return the favicon as a binary response
|
||||
return {
|
||||
"statusCode": 200,
|
||||
"headers": {
|
||||
"Content-Type": "image/x-icon",
|
||||
"Content-Disposition": 'inline; filename="favicon.ico"',
|
||||
},
|
||||
"body": base64.b64encode(favicon_data).decode("utf-8"),
|
||||
"isBase64Encoded": True,
|
||||
}
|
||||
|
||||
|
||||
# Optional: Call the function directly to generate and save favicon locally (if running outside Lambda)
|
||||
if __name__ == "__main__":
|
||||
favicon_data = create_favicon()
|
||||
save_favicon_to_disk(favicon_data)
|
105
praktika/gh.py
Normal file
105
praktika/gh.py
Normal file
@ -0,0 +1,105 @@
|
||||
import json
|
||||
import time
|
||||
|
||||
from praktika._environment import _Environment
|
||||
from praktika.result import Result
|
||||
from praktika.settings import Settings
|
||||
from praktika.utils import Shell
|
||||
|
||||
|
||||
class GH:
|
||||
@classmethod
|
||||
def do_command_with_retries(cls, command):
|
||||
res = False
|
||||
retry_count = 0
|
||||
out, err = "", ""
|
||||
|
||||
while retry_count < Settings.MAX_RETRIES_GH and not res:
|
||||
ret_code, out, err = Shell.get_res_stdout_stderr(command, verbose=True)
|
||||
res = ret_code == 0
|
||||
if not res and "Validation Failed" in err:
|
||||
print("ERROR: GH command validation error")
|
||||
break
|
||||
if not res and "Bad credentials" in err:
|
||||
print("ERROR: GH credentials/auth failure")
|
||||
break
|
||||
if not res:
|
||||
retry_count += 1
|
||||
time.sleep(5)
|
||||
|
||||
if not res:
|
||||
print(
|
||||
f"ERROR: Failed to execute gh command [{command}] out:[{out}] err:[{err}] after [{retry_count}] attempts"
|
||||
)
|
||||
return res
|
||||
|
||||
@classmethod
|
||||
def post_pr_comment(
|
||||
cls, comment_body, or_update_comment_with_substring, repo=None, pr=None
|
||||
):
|
||||
if not repo:
|
||||
repo = _Environment.get().REPOSITORY
|
||||
if not pr:
|
||||
pr = _Environment.get().PR_NUMBER
|
||||
if or_update_comment_with_substring:
|
||||
print(f"check comment [{comment_body}] created")
|
||||
cmd_check_created = f'gh api -H "Accept: application/vnd.github.v3+json" \
|
||||
"/repos/{repo}/issues/{pr}/comments" \
|
||||
--jq \'.[] | {{id: .id, body: .body}}\' | grep -F "{or_update_comment_with_substring}"'
|
||||
output = Shell.get_output(cmd_check_created)
|
||||
if output:
|
||||
comment_ids = []
|
||||
try:
|
||||
comment_ids = [
|
||||
json.loads(item.strip())["id"] for item in output.split("\n")
|
||||
]
|
||||
except Exception as ex:
|
||||
print(f"Failed to retrieve PR comments with [{ex}]")
|
||||
for id in comment_ids:
|
||||
cmd = f'gh api \
|
||||
-X PATCH \
|
||||
-H "Accept: application/vnd.github.v3+json" \
|
||||
"/repos/{repo}/issues/comments/{id}" \
|
||||
-f body=\'{comment_body}\''
|
||||
print(f"Update existing comments [{id}]")
|
||||
return cls.do_command_with_retries(cmd)
|
||||
|
||||
cmd = f'gh pr comment {pr} --body "{comment_body}"'
|
||||
return cls.do_command_with_retries(cmd)
|
||||
|
||||
@classmethod
|
||||
def post_commit_status(cls, name, status, description, url):
|
||||
status = cls.convert_to_gh_status(status)
|
||||
command = (
|
||||
f"gh api -X POST -H 'Accept: application/vnd.github.v3+json' "
|
||||
f"/repos/{_Environment.get().REPOSITORY}/statuses/{_Environment.get().SHA} "
|
||||
f"-f state='{status}' -f target_url='{url}' "
|
||||
f"-f description='{description}' -f context='{name}'"
|
||||
)
|
||||
return cls.do_command_with_retries(command)
|
||||
|
||||
@classmethod
|
||||
def convert_to_gh_status(cls, status):
|
||||
if status in (
|
||||
Result.Status.PENDING,
|
||||
Result.Status.SUCCESS,
|
||||
Result.Status.FAILED,
|
||||
Result.Status.ERROR,
|
||||
):
|
||||
return status
|
||||
if status in Result.Status.RUNNING:
|
||||
return Result.Status.PENDING
|
||||
else:
|
||||
assert (
|
||||
False
|
||||
), f"Invalid status [{status}] to be set as GH commit status.state"
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# test
|
||||
GH.post_pr_comment(
|
||||
comment_body="foobar",
|
||||
or_update_comment_with_substring="CI",
|
||||
repo="ClickHouse/praktika",
|
||||
pr=15,
|
||||
)
|
71
praktika/gh_auth.py
Normal file
71
praktika/gh_auth.py
Normal file
@ -0,0 +1,71 @@
|
||||
import sys
|
||||
import time
|
||||
from typing import List
|
||||
|
||||
import requests
|
||||
from jwt import JWT, jwk_from_pem
|
||||
from praktika import Workflow
|
||||
from praktika.mangle import _get_workflows
|
||||
from praktika.settings import Settings
|
||||
from praktika.utils import Shell
|
||||
|
||||
|
||||
class GHAuth:
|
||||
@staticmethod
|
||||
def _generate_jwt(client_id, pem):
|
||||
pem = str.encode(pem)
|
||||
signing_key = jwk_from_pem(pem)
|
||||
payload = {
|
||||
"iat": int(time.time()),
|
||||
"exp": int(time.time()) + 600,
|
||||
"iss": client_id,
|
||||
}
|
||||
# Create JWT
|
||||
jwt_instance = JWT()
|
||||
encoded_jwt = jwt_instance.encode(payload, signing_key, alg="RS256")
|
||||
return encoded_jwt
|
||||
|
||||
@staticmethod
|
||||
def _get_installation_id(jwt_token):
|
||||
headers = {
|
||||
"Authorization": f"Bearer {jwt_token}",
|
||||
"Accept": "application/vnd.github.v3+json",
|
||||
}
|
||||
response = requests.get(
|
||||
"https://api.github.com/app/installations", headers=headers, timeout=10
|
||||
)
|
||||
response.raise_for_status()
|
||||
installations = response.json()
|
||||
assert installations, "No installations found for the GitHub App"
|
||||
return installations[0]["id"]
|
||||
|
||||
@staticmethod
|
||||
def _get_access_token(jwt_token, installation_id):
|
||||
headers = {
|
||||
"Authorization": f"Bearer {jwt_token}",
|
||||
"Accept": "application/vnd.github.v3+json",
|
||||
}
|
||||
url = (
|
||||
f"https://api.github.com/app/installations/{installation_id}/access_tokens"
|
||||
)
|
||||
response = requests.post(url, headers=headers, timeout=10)
|
||||
response.raise_for_status()
|
||||
return response.json()["token"]
|
||||
|
||||
@classmethod
|
||||
def auth(cls, workflow_name) -> None:
|
||||
wf = _get_workflows(workflow_name) # type: List[Workflow.Config]
|
||||
pem = wf[0].get_secret(Settings.SECRET_GH_APP_PEM_KEY).get_value()
|
||||
assert pem
|
||||
app_id = wf[0].get_secret(Settings.SECRET_GH_APP_ID).get_value()
|
||||
# Generate JWT
|
||||
jwt_token = cls._generate_jwt(app_id, pem)
|
||||
# Get Installation ID
|
||||
installation_id = cls._get_installation_id(jwt_token)
|
||||
# Get Installation Access Token
|
||||
access_token = cls._get_access_token(jwt_token, installation_id)
|
||||
Shell.check(f"echo {access_token} | gh auth login --with-token", strict=True)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
GHAuth.auth(sys.argv[1])
|
124
praktika/hook_cache.py
Normal file
124
praktika/hook_cache.py
Normal file
@ -0,0 +1,124 @@
|
||||
from praktika._environment import _Environment
|
||||
from praktika.cache import Cache
|
||||
from praktika.mangle import _get_workflows
|
||||
from praktika.runtime import RunConfig
|
||||
from praktika.settings import Settings
|
||||
from praktika.utils import Utils
|
||||
|
||||
|
||||
class CacheRunnerHooks:
|
||||
@classmethod
|
||||
def configure(cls, _workflow):
|
||||
workflow_config = RunConfig.from_fs(_workflow.name)
|
||||
cache = Cache()
|
||||
assert _Environment.get().WORKFLOW_NAME
|
||||
workflow = _get_workflows(name=_Environment.get().WORKFLOW_NAME)[0]
|
||||
print(f"Workflow Configure, workflow [{workflow.name}]")
|
||||
assert (
|
||||
workflow.enable_cache
|
||||
), f"Outdated yaml pipelines or BUG. Configuration must be run only for workflow with enabled cache, workflow [{workflow.name}]"
|
||||
artifact_digest_map = {}
|
||||
job_digest_map = {}
|
||||
for job in workflow.jobs:
|
||||
if not job.digest_config:
|
||||
print(
|
||||
f"NOTE: job [{job.name}] has no Config.digest_config - skip cache check, always run"
|
||||
)
|
||||
digest = cache.digest.calc_job_digest(job_config=job)
|
||||
job_digest_map[job.name] = digest
|
||||
if job.provides:
|
||||
# assign the job digest also to the artifacts it provides
|
||||
for artifact in job.provides:
|
||||
artifact_digest_map[artifact] = digest
|
||||
for job in workflow.jobs:
|
||||
digests_combined_list = []
|
||||
if job.requires:
|
||||
# include digest of required artifact to the job digest, so that they affect job state
|
||||
for artifact_name in job.requires:
|
||||
if artifact_name not in [
|
||||
artifact.name for artifact in workflow.artifacts
|
||||
]:
|
||||
# phony artifact assumed to be not affecting jobs that depend on it
|
||||
continue
|
||||
digests_combined_list.append(artifact_digest_map[artifact_name])
|
||||
digests_combined_list.append(job_digest_map[job.name])
|
||||
final_digest = "-".join(digests_combined_list)
|
||||
workflow_config.digest_jobs[job.name] = final_digest
|
||||
|
||||
assert (
|
||||
workflow_config.digest_jobs
|
||||
), f"BUG, Workflow with enabled cache must have job digests after configuration, wf [{workflow.name}]"
|
||||
|
||||
print("Check remote cache")
|
||||
job_to_cache_record = {}
|
||||
for job_name, job_digest in workflow_config.digest_jobs.items():
|
||||
record = cache.fetch_success(job_name=job_name, job_digest=job_digest)
|
||||
if record:
|
||||
assert (
|
||||
Utils.normalize_string(job_name)
|
||||
not in workflow_config.cache_success
|
||||
)
|
||||
workflow_config.cache_success.append(job_name)
|
||||
workflow_config.cache_success_base64.append(Utils.to_base64(job_name))
|
||||
job_to_cache_record[job_name] = record
|
||||
|
||||
print("Check artifacts to reuse")
|
||||
for job in workflow.jobs:
|
||||
if job.name in workflow_config.cache_success:
|
||||
if job.provides:
|
||||
for artifact_name in job.provides:
|
||||
workflow_config.cache_artifacts[artifact_name] = (
|
||||
job_to_cache_record[job.name]
|
||||
)
|
||||
|
||||
print(f"Write config to GH's job output")
|
||||
with open(_Environment.get().JOB_OUTPUT_STREAM, "a", encoding="utf8") as f:
|
||||
print(
|
||||
f"DATA={workflow_config.to_json()}",
|
||||
file=f,
|
||||
)
|
||||
print(f"WorkflowRuntimeConfig: [{workflow_config.to_json(pretty=True)}]")
|
||||
print(
|
||||
"Dump WorkflowConfig to fs, the next hooks in this job might want to see it"
|
||||
)
|
||||
workflow_config.dump()
|
||||
|
||||
return workflow_config
|
||||
|
||||
@classmethod
|
||||
def pre_run(cls, _workflow, _job, _required_artifacts=None):
|
||||
path_prefixes = []
|
||||
if _job.name == Settings.CI_CONFIG_JOB_NAME:
|
||||
# SPECIAL handling
|
||||
return path_prefixes
|
||||
env = _Environment.get()
|
||||
runtime_config = RunConfig.from_fs(_workflow.name)
|
||||
required_artifacts = []
|
||||
if _required_artifacts:
|
||||
required_artifacts = _required_artifacts
|
||||
for artifact in required_artifacts:
|
||||
if artifact.name in runtime_config.cache_artifacts:
|
||||
record = runtime_config.cache_artifacts[artifact.name]
|
||||
print(f"Reuse artifact [{artifact.name}] from [{record}]")
|
||||
path_prefixes.append(
|
||||
env.get_s3_prefix_static(
|
||||
record.pr_number, record.branch, record.sha
|
||||
)
|
||||
)
|
||||
else:
|
||||
path_prefixes.append(env.get_s3_prefix())
|
||||
return path_prefixes
|
||||
|
||||
@classmethod
|
||||
def run(cls, workflow, job):
|
||||
pass
|
||||
|
||||
@classmethod
|
||||
def post_run(cls, workflow, job):
|
||||
if job.name == Settings.CI_CONFIG_JOB_NAME:
|
||||
return
|
||||
if job.digest_config:
|
||||
# cache is enabled, and it's a job that supposed to be cached (has defined digest config)
|
||||
workflow_runtime = RunConfig.from_fs(workflow.name)
|
||||
job_digest = workflow_runtime.digest_jobs[job.name]
|
||||
Cache.push_success_record(job.name, job_digest, workflow_runtime.sha)
|
153
praktika/hook_html.py
Normal file
153
praktika/hook_html.py
Normal file
@ -0,0 +1,153 @@
|
||||
import urllib.parse
|
||||
from pathlib import Path
|
||||
|
||||
from praktika._environment import _Environment
|
||||
from praktika.gh import GH
|
||||
from praktika.parser import WorkflowConfigParser
|
||||
from praktika.result import Result, ResultInfo
|
||||
from praktika.runtime import RunConfig
|
||||
from praktika.s3 import S3
|
||||
from praktika.settings import Settings
|
||||
from praktika.utils import Utils
|
||||
|
||||
|
||||
class HtmlRunnerHooks:
|
||||
@classmethod
|
||||
def configure(cls, _workflow):
|
||||
# generate pending Results for all jobs in the workflow
|
||||
if _workflow.enable_cache:
|
||||
skip_jobs = RunConfig.from_fs(_workflow.name).cache_success
|
||||
else:
|
||||
skip_jobs = []
|
||||
|
||||
env = _Environment.get()
|
||||
results = []
|
||||
for job in _workflow.jobs:
|
||||
if job.name not in skip_jobs:
|
||||
result = Result.generate_pending(job.name)
|
||||
else:
|
||||
result = Result.generate_skipped(job.name)
|
||||
results.append(result)
|
||||
summary_result = Result.generate_pending(_workflow.name, results=results)
|
||||
summary_result.aux_links.append(env.CHANGE_URL)
|
||||
summary_result.aux_links.append(env.RUN_URL)
|
||||
summary_result.start_time = Utils.timestamp()
|
||||
page_url = "/".join(
|
||||
["https:/", Settings.HTML_S3_PATH, str(Path(Settings.HTML_PAGE_FILE).name)]
|
||||
)
|
||||
for bucket, endpoint in Settings.S3_BUCKET_TO_HTTP_ENDPOINT.items():
|
||||
page_url = page_url.replace(bucket, endpoint)
|
||||
# TODO: add support for non-PRs (use branch?)
|
||||
page_url += f"?PR={env.PR_NUMBER}&sha=latest&name_0={urllib.parse.quote(env.WORKFLOW_NAME, safe='')}"
|
||||
summary_result.html_link = page_url
|
||||
|
||||
# clean the previous latest results in PR if any
|
||||
if env.PR_NUMBER:
|
||||
S3.clean_latest_result()
|
||||
S3.copy_result_to_s3(
|
||||
summary_result,
|
||||
unlock=False,
|
||||
)
|
||||
|
||||
print(f"CI Status page url [{page_url}]")
|
||||
|
||||
res1 = GH.post_commit_status(
|
||||
name=_workflow.name,
|
||||
status=Result.Status.PENDING,
|
||||
description="",
|
||||
url=page_url,
|
||||
)
|
||||
res2 = GH.post_pr_comment(
|
||||
comment_body=f"Workflow [[{_workflow.name}]({page_url})], commit [{_Environment.get().SHA[:8]}]",
|
||||
or_update_comment_with_substring=f"Workflow [",
|
||||
)
|
||||
if not (res1 or res2):
|
||||
print(
|
||||
"ERROR: Failed to set both GH commit status and PR comment with Workflow Status, cannot proceed"
|
||||
)
|
||||
raise
|
||||
|
||||
@classmethod
|
||||
def pre_run(cls, _workflow, _job):
|
||||
result = Result.from_fs(_job.name)
|
||||
S3.copy_result_from_s3(
|
||||
Result.file_name_static(_workflow.name),
|
||||
)
|
||||
workflow_result = Result.from_fs(_workflow.name)
|
||||
workflow_result.update_sub_result(result)
|
||||
S3.copy_result_to_s3(
|
||||
workflow_result,
|
||||
unlock=True,
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def run(cls, _workflow, _job):
|
||||
pass
|
||||
|
||||
@classmethod
|
||||
def post_run(cls, _workflow, _job, info_errors):
|
||||
result = Result.from_fs(_job.name)
|
||||
env = _Environment.get()
|
||||
S3.copy_result_from_s3(
|
||||
Result.file_name_static(_workflow.name),
|
||||
lock=True,
|
||||
)
|
||||
workflow_result = Result.from_fs(_workflow.name)
|
||||
print(f"Workflow info [{workflow_result.info}], info_errors [{info_errors}]")
|
||||
|
||||
env_info = env.REPORT_INFO
|
||||
if env_info:
|
||||
print(
|
||||
f"WARNING: some info lines are set in Environment - append to report [{env_info}]"
|
||||
)
|
||||
info_errors += env_info
|
||||
if info_errors:
|
||||
info_errors = [f" | {error}" for error in info_errors]
|
||||
info_str = f"{_job.name}:\n"
|
||||
info_str += "\n".join(info_errors)
|
||||
print("Update workflow results with new info")
|
||||
workflow_result.set_info(info_str)
|
||||
|
||||
old_status = workflow_result.status
|
||||
|
||||
S3.upload_result_files_to_s3(result)
|
||||
workflow_result.update_sub_result(result)
|
||||
|
||||
skipped_job_results = []
|
||||
if not result.is_ok():
|
||||
print(
|
||||
"Current job failed - find dependee jobs in the workflow and set their statuses to skipped"
|
||||
)
|
||||
workflow_config_parsed = WorkflowConfigParser(_workflow).parse()
|
||||
for dependee_job in workflow_config_parsed.workflow_yaml_config.jobs:
|
||||
if _job.name in dependee_job.needs:
|
||||
if _workflow.get_job(dependee_job.name).run_unless_cancelled:
|
||||
continue
|
||||
print(
|
||||
f"NOTE: Set job [{dependee_job.name}] status to [{Result.Status.SKIPPED}] due to current failure"
|
||||
)
|
||||
skipped_job_results.append(
|
||||
Result(
|
||||
name=dependee_job.name,
|
||||
status=Result.Status.SKIPPED,
|
||||
info=ResultInfo.SKIPPED_DUE_TO_PREVIOUS_FAILURE
|
||||
+ f" [{_job.name}]",
|
||||
)
|
||||
)
|
||||
for skipped_job_result in skipped_job_results:
|
||||
workflow_result.update_sub_result(skipped_job_result)
|
||||
|
||||
S3.copy_result_to_s3(
|
||||
workflow_result,
|
||||
unlock=True,
|
||||
)
|
||||
if workflow_result.status != old_status:
|
||||
print(
|
||||
f"Update GH commit status [{result.name}]: [{old_status} -> {workflow_result.status}], link [{workflow_result.html_link}]"
|
||||
)
|
||||
GH.post_commit_status(
|
||||
name=workflow_result.name,
|
||||
status=GH.convert_to_gh_status(workflow_result.status),
|
||||
description="",
|
||||
url=workflow_result.html_link,
|
||||
)
|
43
praktika/hook_interface.py
Normal file
43
praktika/hook_interface.py
Normal file
@ -0,0 +1,43 @@
|
||||
from abc import ABC, abstractmethod
|
||||
|
||||
from praktika import Workflow
|
||||
|
||||
|
||||
class HookInterface(ABC):
|
||||
@abstractmethod
|
||||
def pre_run(self, _workflow, _job):
|
||||
"""
|
||||
runs in pre-run step
|
||||
:param _workflow:
|
||||
:param _job:
|
||||
:return:
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def run(self, _workflow, _job):
|
||||
"""
|
||||
runs in run step
|
||||
:param _workflow:
|
||||
:param _job:
|
||||
:return:
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def post_run(self, _workflow, _job):
|
||||
"""
|
||||
runs in post-run step
|
||||
:param _workflow:
|
||||
:param _job:
|
||||
:return:
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def configure(self, _workflow: Workflow.Config):
|
||||
"""
|
||||
runs in initial WorkflowConfig job in run step
|
||||
:return:
|
||||
"""
|
||||
pass
|
10
praktika/html_prepare.py
Normal file
10
praktika/html_prepare.py
Normal file
@ -0,0 +1,10 @@
|
||||
from praktika.s3 import S3
|
||||
from praktika.settings import Settings
|
||||
|
||||
|
||||
class Html:
|
||||
@classmethod
|
||||
def prepare(cls):
|
||||
S3.copy_file_to_s3(
|
||||
s3_path=Settings.HTML_S3_PATH, local_path=Settings.HTML_PAGE_FILE
|
||||
)
|
102
praktika/job.py
Normal file
102
praktika/job.py
Normal file
@ -0,0 +1,102 @@
|
||||
import copy
|
||||
import json
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, List, Optional
|
||||
|
||||
|
||||
class Job:
|
||||
@dataclass
|
||||
class Requirements:
|
||||
python: bool = False
|
||||
python_requirements_txt: str = ""
|
||||
|
||||
@dataclass
|
||||
class CacheDigestConfig:
|
||||
include_paths: List[str] = field(default_factory=list)
|
||||
exclude_paths: List[str] = field(default_factory=list)
|
||||
|
||||
@dataclass
|
||||
class Config:
|
||||
# Job Name
|
||||
name: str
|
||||
|
||||
# Machine's label to run job on. For instance [ubuntu-latest] for free gh runner
|
||||
runs_on: List[str]
|
||||
|
||||
# Job Run Command
|
||||
command: str
|
||||
|
||||
# What job requires
|
||||
# May be phony or physical names
|
||||
requires: List[str] = field(default_factory=list)
|
||||
|
||||
# What job provides
|
||||
# May be phony or physical names
|
||||
provides: List[str] = field(default_factory=list)
|
||||
|
||||
job_requirements: Optional["Job.Requirements"] = None
|
||||
|
||||
timeout: int = 1 * 3600
|
||||
|
||||
digest_config: Optional["Job.CacheDigestConfig"] = None
|
||||
|
||||
run_in_docker: str = ""
|
||||
|
||||
run_unless_cancelled: bool = False
|
||||
|
||||
allow_merge_on_failure: bool = False
|
||||
|
||||
parameter: Any = None
|
||||
|
||||
def parametrize(
|
||||
self,
|
||||
parameter: Optional[List[Any]] = None,
|
||||
runs_on: Optional[List[List[str]]] = None,
|
||||
timeout: Optional[List[int]] = None,
|
||||
):
|
||||
assert (
|
||||
parameter or runs_on
|
||||
), "Either :parameter or :runs_on must be non empty list for parametrisation"
|
||||
if not parameter:
|
||||
parameter = [None] * len(runs_on)
|
||||
if not runs_on:
|
||||
runs_on = [None] * len(parameter)
|
||||
if not timeout:
|
||||
timeout = [None] * len(parameter)
|
||||
assert (
|
||||
len(parameter) == len(runs_on) == len(timeout)
|
||||
), "Parametrization lists must be of the same size"
|
||||
|
||||
res = []
|
||||
for parameter_, runs_on_, timeout_ in zip(parameter, runs_on, timeout):
|
||||
obj = copy.deepcopy(self)
|
||||
if parameter_:
|
||||
obj.parameter = parameter_
|
||||
if runs_on_:
|
||||
obj.runs_on = runs_on_
|
||||
if timeout_:
|
||||
obj.timeout = timeout_
|
||||
obj.name = obj.get_job_name_with_parameter()
|
||||
res.append(obj)
|
||||
return res
|
||||
|
||||
def get_job_name_with_parameter(self):
|
||||
name, parameter, runs_on = self.name, self.parameter, self.runs_on
|
||||
res = name
|
||||
name_params = []
|
||||
if isinstance(parameter, list) or isinstance(parameter, dict):
|
||||
name_params.append(json.dumps(parameter))
|
||||
elif parameter is not None:
|
||||
name_params.append(parameter)
|
||||
if runs_on:
|
||||
assert isinstance(runs_on, list)
|
||||
name_params.append(json.dumps(runs_on))
|
||||
if name_params:
|
||||
name_params = [str(param) for param in name_params]
|
||||
res += f" ({', '.join(name_params)})"
|
||||
|
||||
self.name = res
|
||||
return res
|
||||
|
||||
def __repr__(self):
|
||||
return self.name
|
727
praktika/json.html
Normal file
727
praktika/json.html
Normal file
@ -0,0 +1,727 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>praktika report</title>
|
||||
<link rel="icon" href="https://w4z3pajszlbkfcw2wcylfei5km0xmwag.lambda-url.us-east-1.on.aws/" type="image/x-icon">
|
||||
<style>
|
||||
|
||||
/* Default (Day Theme) */
|
||||
:root {
|
||||
--background-color: white;
|
||||
--text-color: #000;
|
||||
--tile-background: #f9f9f9;
|
||||
--footer-background: #f1f1f1;
|
||||
--footer-text-color: #000;
|
||||
--status-width: 300px;
|
||||
}
|
||||
|
||||
body {
|
||||
background-color: var(--background-color);
|
||||
color: var(--text-color);
|
||||
height: 100%;
|
||||
margin: 0;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
font-family: monospace, sans-serif;
|
||||
}
|
||||
|
||||
body.night-theme {
|
||||
--background-color: #1F1F1C;
|
||||
--text-color: #fff;
|
||||
--tile-background: black;
|
||||
}
|
||||
|
||||
#info-container {
|
||||
margin-left: calc(var(--status-width) + 20px);
|
||||
margin-bottom: 10px;
|
||||
background-color: var(--tile-background);
|
||||
padding: 10px;
|
||||
text-align: left;
|
||||
}
|
||||
|
||||
#status-container {
|
||||
position: fixed;
|
||||
top: 0;
|
||||
bottom: 0;
|
||||
left: 0;
|
||||
width: var(--status-width);
|
||||
background-color: var(--tile-background);
|
||||
padding: 20px;
|
||||
box-sizing: border-box;
|
||||
text-align: left;
|
||||
font-size: 18px;
|
||||
font-weight: bold;
|
||||
margin: 0; /* Remove margin */
|
||||
}
|
||||
|
||||
#status-container button {
|
||||
display: block; /* Stack buttons vertically */
|
||||
width: 100%; /* Full width of container */
|
||||
padding: 10px;
|
||||
margin-bottom: 10px; /* Space between buttons */
|
||||
background-color: #4CAF50; /* Green background color */
|
||||
color: white;
|
||||
border: none;
|
||||
border-radius: 5px;
|
||||
font-size: 16px;
|
||||
cursor: pointer;
|
||||
}
|
||||
|
||||
#status-container button:hover {
|
||||
background-color: #45a049; /* Darker green on hover */
|
||||
}
|
||||
|
||||
#result-container {
|
||||
background-color: var(--tile-background);
|
||||
margin-left: calc(var(--status-width) + 20px);
|
||||
padding: 20px;
|
||||
box-sizing: border-box;
|
||||
text-align: center;
|
||||
font-size: 18px;
|
||||
font-weight: normal;
|
||||
flex-grow: 1;
|
||||
}
|
||||
|
||||
#footer {
|
||||
padding: 10px;
|
||||
position: fixed;
|
||||
bottom: 0;
|
||||
left: 0;
|
||||
right: 0;
|
||||
background-color: #1F1F1C;
|
||||
color: white;
|
||||
font-size: 14px;
|
||||
display: flex;
|
||||
justify-content: space-between; /* Ensure the .left expands, and .right and .settings are aligned to the right */
|
||||
align-items: center;
|
||||
}
|
||||
|
||||
#footer a {
|
||||
color: white;
|
||||
text-decoration: none;
|
||||
}
|
||||
|
||||
#footer .left {
|
||||
flex-grow: 1; /* Takes up all the available space */
|
||||
}
|
||||
|
||||
/* make some space around '/' in the navigation line */
|
||||
#footer .left span.separator {
|
||||
margin-left: 5px;
|
||||
margin-right: 5px;
|
||||
}
|
||||
|
||||
#footer .right, #footer .settings {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
}
|
||||
|
||||
#footer .right a::before {
|
||||
content: "#";
|
||||
margin-left: 10px;
|
||||
color: #e0e0e0;
|
||||
}
|
||||
|
||||
#footer .right::before, #footer .settings::before {
|
||||
content: "|"; /* Add separator before right and settings sections */
|
||||
margin-left: 10px;
|
||||
margin-right: 10px;
|
||||
color: #e0e0e0;
|
||||
}
|
||||
|
||||
#theme-toggle {
|
||||
cursor: pointer;
|
||||
font-size: 20px;
|
||||
color: white;
|
||||
}
|
||||
|
||||
#theme-toggle:hover {
|
||||
color: #e0e0e0;
|
||||
}
|
||||
|
||||
#footer a:hover {
|
||||
text-decoration: underline;
|
||||
}
|
||||
|
||||
#links {
|
||||
margin-top: 10px;
|
||||
padding: 15px;
|
||||
border: 1px solid #ccc;
|
||||
border-radius: 5px;
|
||||
background-color: #f9f9f9;
|
||||
}
|
||||
|
||||
#links a {
|
||||
display: block;
|
||||
margin-bottom: 5px;
|
||||
padding: 5px 10px;
|
||||
background-color: #D5D5D5;
|
||||
color: black;
|
||||
text-decoration: none;
|
||||
border-radius: 5px;
|
||||
}
|
||||
|
||||
#links a:hover {
|
||||
background-color: #D5D5D5;
|
||||
}
|
||||
|
||||
table {
|
||||
width: 100%;
|
||||
border-collapse: collapse;
|
||||
}
|
||||
|
||||
th.name-column, td.name-column {
|
||||
max-width: 400px; /* Set the maximum width for the column */
|
||||
white-space: nowrap; /* Prevent text from wrapping */
|
||||
overflow: hidden; /* Hide the overflowed text */
|
||||
text-overflow: ellipsis; /* Show ellipsis (...) for overflowed text */
|
||||
}
|
||||
|
||||
th.status-column, td.status-column {
|
||||
max-width: 100px; /* Set the maximum width for the column */
|
||||
white-space: nowrap; /* Prevent text from wrapping */
|
||||
overflow: hidden; /* Hide the overflowed text */
|
||||
text-overflow: ellipsis; /* Show ellipsis (...) for overflowed text */
|
||||
}
|
||||
|
||||
th.time-column, td.time-column {
|
||||
max-width: 120px; /* Set the maximum width for the column */
|
||||
white-space: nowrap; /* Prevent text from wrapping */
|
||||
text-align: right;
|
||||
}
|
||||
|
||||
th.info-column, td.info-column {
|
||||
width: 100%; /* Allow the column to take all the remaining space */
|
||||
}
|
||||
|
||||
th, td {
|
||||
padding: 8px;
|
||||
border: 1px solid #ddd;
|
||||
text-align: left;
|
||||
}
|
||||
|
||||
th {
|
||||
background-color: #f4f4f4;
|
||||
}
|
||||
|
||||
.status-success {
|
||||
color: green;
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
.status-fail {
|
||||
color: red;
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
.status-pending {
|
||||
color: #d4a017;
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
.status-broken {
|
||||
color: purple;
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
.status-run {
|
||||
color: blue;
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
.status-error {
|
||||
color: darkred;
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
.status-other {
|
||||
color: grey;
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
.json-key {
|
||||
font-weight: bold;
|
||||
margin-top: 10px;
|
||||
}
|
||||
|
||||
.json-value {
|
||||
margin-left: 20px;
|
||||
}
|
||||
|
||||
.json-value a {
|
||||
color: #007bff;
|
||||
}
|
||||
|
||||
.json-value a:hover {
|
||||
text-decoration: underline;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<div id="info-container"></div>
|
||||
<div id="status-container"></div>
|
||||
<div id="result-container"></div>
|
||||
|
||||
<footer id="footer">
|
||||
<div class="left"></div>
|
||||
<div class="right"></div>
|
||||
<div class="settings">
|
||||
<span id="theme-toggle">☀️</span>
|
||||
</div>
|
||||
</footer>
|
||||
|
||||
<script>
|
||||
function toggleTheme() {
|
||||
document.body.classList.toggle('night-theme');
|
||||
const toggleIcon = document.getElementById('theme-toggle');
|
||||
if (document.body.classList.contains('night-theme')) {
|
||||
toggleIcon.textContent = '☾'; // Moon for night mode
|
||||
} else {
|
||||
toggleIcon.textContent = '☀️'; // Sun for day mode
|
||||
}
|
||||
}
|
||||
|
||||
// Attach the toggle function to the click event of the icon
|
||||
document.getElementById('theme-toggle').addEventListener('click', toggleTheme);
|
||||
|
||||
// Function to format timestamp to "DD-mmm-YYYY HH:MM:SS.MM"
|
||||
function formatTimestamp(timestamp, showDate = true) {
|
||||
const date = new Date(timestamp * 1000);
|
||||
const day = String(date.getDate()).padStart(2, '0');
|
||||
const monthNames = ["Jan", "Feb", "Mar", "Apr", "May", "Jun",
|
||||
"Jul", "Aug", "Sep", "Oct", "Nov", "Dec"];
|
||||
const month = monthNames[date.getMonth()];
|
||||
const year = date.getFullYear();
|
||||
const hours = String(date.getHours()).padStart(2, '0');
|
||||
const minutes = String(date.getMinutes()).padStart(2, '0');
|
||||
const seconds = String(date.getSeconds()).padStart(2, '0');
|
||||
//const milliseconds = String(date.getMilliseconds()).padStart(2, '0');
|
||||
|
||||
return showDate
|
||||
? `${day}-${month}-${year} ${hours}:${minutes}:${seconds}`
|
||||
: `${hours}:${minutes}:${seconds}`;
|
||||
}
|
||||
|
||||
// Function to determine status class based on value
|
||||
function getStatusClass(status) {
|
||||
const lowerStatus = status.toLowerCase();
|
||||
if (lowerStatus.includes('success') || lowerStatus === 'ok') return 'status-success';
|
||||
if (lowerStatus.includes('fail')) return 'status-fail';
|
||||
if (lowerStatus.includes('pending')) return 'status-pending';
|
||||
if (lowerStatus.includes('broken')) return 'status-broken';
|
||||
if (lowerStatus.includes('run')) return 'status-run';
|
||||
if (lowerStatus.includes('error')) return 'status-error';
|
||||
return 'status-other';
|
||||
}
|
||||
|
||||
// Function to format duration from seconds to "HH:MM:SS"
|
||||
function formatDuration(durationInSeconds) {
|
||||
// Check if the duration is empty, null, or not a number
|
||||
if (!durationInSeconds || isNaN(durationInSeconds)) {
|
||||
return '';
|
||||
}
|
||||
|
||||
// Ensure duration is a floating-point number
|
||||
const duration = parseFloat(durationInSeconds);
|
||||
|
||||
// Calculate seconds and milliseconds
|
||||
const seconds = Math.floor(duration); // Whole seconds
|
||||
const milliseconds = Math.floor((duration % 1) * 1000); // Convert fraction to milliseconds
|
||||
|
||||
// Format seconds and milliseconds with leading zeros where needed
|
||||
const formattedSeconds = String(seconds);
|
||||
const formattedMilliseconds = String(milliseconds).padStart(3, '0');
|
||||
|
||||
// Return the formatted duration as seconds.milliseconds
|
||||
return `${formattedSeconds}.${formattedMilliseconds}`;
|
||||
}
|
||||
|
||||
function addKeyValueToStatus(key, value) {
|
||||
|
||||
const statusContainer = document.getElementById('status-container');
|
||||
|
||||
const keyElement = document.createElement('div');
|
||||
keyElement.className = 'json-key';
|
||||
keyElement.textContent = key + ':';
|
||||
|
||||
const valueElement = document.createElement('div');
|
||||
valueElement.className = 'json-value';
|
||||
valueElement.textContent = value;
|
||||
|
||||
statusContainer.appendChild(keyElement);
|
||||
statusContainer.appendChild(valueElement);
|
||||
}
|
||||
|
||||
function addFileButtonToStatus(key, links) {
|
||||
|
||||
if (links == null) {
|
||||
return
|
||||
}
|
||||
|
||||
const statusContainer = document.getElementById('status-container');
|
||||
|
||||
const keyElement = document.createElement('div');
|
||||
keyElement.className = 'json-key';
|
||||
keyElement.textContent = key + ':';
|
||||
statusContainer.appendChild(keyElement);
|
||||
|
||||
if (Array.isArray(links) && links.length > 0) {
|
||||
links.forEach(link => {
|
||||
// const a = document.createElement('a');
|
||||
// a.href = link;
|
||||
// a.textContent = link.split('/').pop();
|
||||
// a.target = '_blank';
|
||||
// statusContainer.appendChild(a);
|
||||
const button = document.createElement('button');
|
||||
button.textContent = link.split('/').pop();
|
||||
button.addEventListener('click', function () {
|
||||
window.location.href = link;
|
||||
});
|
||||
statusContainer.appendChild(button);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
function addStatusToStatus(status, start_time, duration) {
|
||||
const statusContainer = document.getElementById('status-container');
|
||||
|
||||
let keyElement = document.createElement('div');
|
||||
let valueElement = document.createElement('div');
|
||||
keyElement.className = 'json-key';
|
||||
valueElement.className = 'json-value';
|
||||
keyElement.textContent = 'status:';
|
||||
valueElement.classList.add('status-value');
|
||||
valueElement.classList.add(getStatusClass(status));
|
||||
valueElement.textContent = status;
|
||||
statusContainer.appendChild(keyElement);
|
||||
statusContainer.appendChild(valueElement);
|
||||
|
||||
keyElement = document.createElement('div');
|
||||
valueElement = document.createElement('div');
|
||||
keyElement.className = 'json-key';
|
||||
valueElement.className = 'json-value';
|
||||
keyElement.textContent = 'start_time:';
|
||||
valueElement.textContent = formatTimestamp(start_time);
|
||||
statusContainer.appendChild(keyElement);
|
||||
statusContainer.appendChild(valueElement);
|
||||
|
||||
keyElement = document.createElement('div');
|
||||
valueElement = document.createElement('div');
|
||||
keyElement.className = 'json-key';
|
||||
valueElement.className = 'json-value';
|
||||
keyElement.textContent = 'duration:';
|
||||
if (duration === null) {
|
||||
// Set initial value to 0 and add a unique ID or data attribute to identify the duration element
|
||||
valueElement.textContent = '00:00:00';
|
||||
valueElement.setAttribute('id', 'duration-value');
|
||||
} else {
|
||||
// Format the duration if it's a valid number
|
||||
valueElement.textContent = formatDuration(duration);
|
||||
}
|
||||
statusContainer.appendChild(keyElement);
|
||||
statusContainer.appendChild(valueElement);
|
||||
}
|
||||
|
||||
function navigatePath(jsonObj, nameArray) {
|
||||
let baseParams = new URLSearchParams(window.location.search);
|
||||
let keysToDelete = [];
|
||||
baseParams.forEach((value, key) => {
|
||||
if (key.startsWith('name_')) {
|
||||
keysToDelete.push(key); // Collect the keys to delete
|
||||
}
|
||||
});
|
||||
keysToDelete.forEach((key) => baseParams.delete(key));
|
||||
let pathNames = [];
|
||||
let pathLinks = [];
|
||||
let currentObj = jsonObj;
|
||||
|
||||
// Add the first entry (root level)
|
||||
baseParams.set(`name_0`, currentObj.name);
|
||||
pathNames.push(currentObj.name);
|
||||
pathLinks.push(`<span class="separator">/</span><a href="${window.location.pathname}?${baseParams.toString()}">${currentObj.name}</a>`);
|
||||
// Iterate through the nameArray starting at index 0
|
||||
for (const [index, name] of nameArray.entries()) {
|
||||
if (index === 0) continue;
|
||||
if (currentObj && Array.isArray(currentObj.results)) {
|
||||
const nextResult = currentObj.results.find(result => result.name === name);
|
||||
if (nextResult) {
|
||||
baseParams.set(`name_${index}`, nextResult.name);
|
||||
pathNames.push(nextResult.name); // Correctly push nextResult name, not currentObj.name
|
||||
pathLinks.push(`<span class="separator">/</span><a href="${window.location.pathname}?${baseParams.toString()}">${nextResult.name}</a>`);
|
||||
currentObj = nextResult; // Move to the next object in the hierarchy
|
||||
} else {
|
||||
console.error(`Name "${name}" not found in results array.`);
|
||||
return null; // Name not found in results array
|
||||
}
|
||||
} else {
|
||||
console.error(`Current object is not structured as expected.`);
|
||||
return null; // Current object is not structured as expected
|
||||
}
|
||||
}
|
||||
const footerLeft = document.querySelector('#footer .left');
|
||||
footerLeft.innerHTML = pathLinks.join('');
|
||||
|
||||
return currentObj;
|
||||
}
|
||||
|
||||
// Define the fixed columns globally, so both functions can use it
|
||||
const columns = ['name', 'status', 'start_time', 'duration', 'info'];
|
||||
|
||||
const columnSymbols = {
|
||||
name: '👤',
|
||||
status: '✔️',
|
||||
start_time: '🕒',
|
||||
duration: '⏳',
|
||||
info: '⚠️'
|
||||
};
|
||||
|
||||
function createResultsTable(results, nest_level) {
|
||||
if (results && Array.isArray(results) && results.length > 0) {
|
||||
const table = document.createElement('table');
|
||||
const thead = document.createElement('thead');
|
||||
const tbody = document.createElement('tbody');
|
||||
|
||||
// Get the current URL parameters
|
||||
const currentUrl = new URL(window.location.href);
|
||||
|
||||
// Create table headers based on the fixed columns
|
||||
const headerRow = document.createElement('tr');
|
||||
columns.forEach(column => {
|
||||
const th = document.createElement('th');
|
||||
th.textContent = th.textContent = columnSymbols[column] || column;
|
||||
th.style.cursor = 'pointer'; // Make headers clickable
|
||||
th.addEventListener('click', () => sortTable(results, column, tbody, nest_level)); // Add click event to sort the table
|
||||
headerRow.appendChild(th);
|
||||
});
|
||||
thead.appendChild(headerRow);
|
||||
|
||||
// Create table rows
|
||||
populateTableRows(tbody, results, columns, nest_level);
|
||||
|
||||
table.appendChild(thead);
|
||||
table.appendChild(tbody);
|
||||
|
||||
return table;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
function populateTableRows(tbody, results, columns, nest_level) {
|
||||
const currentUrl = new URL(window.location.href); // Get the current URL
|
||||
|
||||
// Clear existing rows if re-rendering (used in sorting)
|
||||
tbody.innerHTML = '';
|
||||
|
||||
results.forEach((result, index) => {
|
||||
const row = document.createElement('tr');
|
||||
|
||||
columns.forEach(column => {
|
||||
const td = document.createElement('td');
|
||||
const value = result[column];
|
||||
|
||||
if (column === 'name') {
|
||||
// Create a link for the name field, using name_X
|
||||
const link = document.createElement('a');
|
||||
const newUrl = new URL(currentUrl); // Create a fresh copy of the URL for each row
|
||||
newUrl.searchParams.set(`name_${nest_level}`, value); // Use backticks for string interpolation
|
||||
link.href = newUrl.toString();
|
||||
link.textContent = value;
|
||||
td.classList.add('name-column');
|
||||
td.appendChild(link);
|
||||
} else if (column === 'status') {
|
||||
// Apply status formatting
|
||||
const span = document.createElement('span');
|
||||
span.className = getStatusClass(value);
|
||||
span.textContent = value;
|
||||
td.classList.add('status-column');
|
||||
td.appendChild(span);
|
||||
} else if (column === 'start_time') {
|
||||
td.classList.add('time-column');
|
||||
td.textContent = value ? formatTimestamp(value, false) : '';
|
||||
} else if (column === 'duration') {
|
||||
td.classList.add('time-column');
|
||||
td.textContent = value ? formatDuration(value) : '';
|
||||
} else if (column === 'info') {
|
||||
// For info and other columns, just display the value
|
||||
td.textContent = value || '';
|
||||
td.classList.add('info-column');
|
||||
}
|
||||
|
||||
row.appendChild(td);
|
||||
});
|
||||
|
||||
tbody.appendChild(row);
|
||||
});
|
||||
}
|
||||
|
||||
function sortTable(results, key, tbody, nest_level) {
|
||||
// Find the table header element for the given key
|
||||
let th = null;
|
||||
const tableHeaders = document.querySelectorAll('th'); // Select all table headers
|
||||
tableHeaders.forEach(header => {
|
||||
if (header.textContent.trim().toLowerCase() === key.toLowerCase()) {
|
||||
th = header;
|
||||
}
|
||||
});
|
||||
|
||||
if (!th) {
|
||||
console.error(`No table header found for key: ${key}`);
|
||||
return;
|
||||
}
|
||||
|
||||
// Determine the current sort direction
|
||||
let ascending = th.getAttribute('data-sort-direction') === 'asc' ? false : true;
|
||||
|
||||
// Toggle the sort direction for the next click
|
||||
th.setAttribute('data-sort-direction', ascending ? 'asc' : 'desc');
|
||||
|
||||
// Sort the results array by the given key
|
||||
results.sort((a, b) => {
|
||||
if (a[key] < b[key]) return ascending ? -1 : 1;
|
||||
if (a[key] > b[key]) return ascending ? 1 : -1;
|
||||
return 0;
|
||||
});
|
||||
|
||||
// Re-populate the table with sorted data
|
||||
populateTableRows(tbody, results, columns, nest_level);
|
||||
}
|
||||
|
||||
function loadJSON(PR, sha, nameParams) {
|
||||
const infoElement = document.getElementById('info-container');
|
||||
let lastModifiedTime = null;
|
||||
const task = nameParams[0].toLowerCase();
|
||||
|
||||
// Construct the URL dynamically based on PR, sha, and name_X
|
||||
const baseUrl = window.location.origin + window.location.pathname.replace('/json.html', '');
|
||||
const path = `${baseUrl}/${encodeURIComponent(PR)}/${encodeURIComponent(sha)}/result_${task}.json`;
|
||||
|
||||
fetch(path, {cache: "no-cache"})
|
||||
.then(response => {
|
||||
if (!response.ok) {
|
||||
throw new Error(`HTTP error! status: ${response.status}`);
|
||||
}
|
||||
lastModifiedTime = response.headers.get('Last-Modified');
|
||||
return response.json();
|
||||
})
|
||||
.then(data => {
|
||||
const linksDiv = document.getElementById('links');
|
||||
const resultsDiv = document.getElementById('result-container');
|
||||
const footerRight = document.querySelector('#footer .right');
|
||||
|
||||
let targetData = navigatePath(data, nameParams);
|
||||
let nest_level = nameParams.length;
|
||||
|
||||
if (targetData) {
|
||||
infoElement.style.display = 'none';
|
||||
|
||||
// Handle footer links if present
|
||||
if (Array.isArray(data.aux_links) && data.aux_links.length > 0) {
|
||||
data.aux_links.forEach(link => {
|
||||
const a = document.createElement('a');
|
||||
a.href = link;
|
||||
a.textContent = link.split('/').pop();
|
||||
a.target = '_blank';
|
||||
footerRight.appendChild(a);
|
||||
});
|
||||
}
|
||||
addStatusToStatus(targetData.status, targetData.start_time, targetData.duration)
|
||||
|
||||
// Handle links
|
||||
addFileButtonToStatus('files', targetData.links)
|
||||
|
||||
|
||||
// Handle duration update if duration is null and start_time exists
|
||||
if (targetData.duration === null && targetData.start_time) {
|
||||
let duration = Math.floor(Date.now() / 1000 - targetData.start_time);
|
||||
const durationElement = document.getElementById('duration-value');
|
||||
|
||||
const intervalId = setInterval(() => {
|
||||
duration++;
|
||||
durationElement.textContent = formatDuration(duration);
|
||||
}, 1000);
|
||||
}
|
||||
|
||||
// If 'results' exists and is non-empty, create the table
|
||||
const resultsData = targetData.results;
|
||||
if (Array.isArray(resultsData) && resultsData.length > 0) {
|
||||
const table = createResultsTable(resultsData, nest_level);
|
||||
if (table) {
|
||||
resultsDiv.appendChild(table);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
infoElement.textContent = 'Object Not Found';
|
||||
infoElement.style.display = 'block';
|
||||
}
|
||||
|
||||
// Set up auto-reload if Last-Modified header is present
|
||||
if (lastModifiedTime) {
|
||||
setInterval(() => {
|
||||
checkForUpdate(path, lastModifiedTime);
|
||||
}, 30000); // 30000 milliseconds = 30 seconds
|
||||
}
|
||||
})
|
||||
.catch(error => {
|
||||
console.error('Error loading JSON:', error);
|
||||
infoElement.textContent = 'Error loading data';
|
||||
infoElement.style.display = 'block';
|
||||
});
|
||||
}
|
||||
|
||||
// Function to check if the JSON file is updated
|
||||
function checkForUpdate(path, lastModifiedTime) {
|
||||
fetch(path, {method: 'HEAD'})
|
||||
.then(response => {
|
||||
if (!response.ok) {
|
||||
throw new Error(`HTTP error! status: ${response.status}`);
|
||||
}
|
||||
const newLastModifiedTime = response.headers.get('Last-Modified');
|
||||
if (newLastModifiedTime && new Date(newLastModifiedTime) > new Date(lastModifiedTime)) {
|
||||
// If the JSON file has been updated, reload the page
|
||||
window.location.reload();
|
||||
}
|
||||
})
|
||||
.catch(error => {
|
||||
console.error('Error checking for update:', error);
|
||||
});
|
||||
}
|
||||
|
||||
// Initialize the page and load JSON from URL parameter
|
||||
function init() {
|
||||
const urlParams = new URLSearchParams(window.location.search);
|
||||
const PR = urlParams.get('PR');
|
||||
const sha = urlParams.get('sha');
|
||||
const root_name = urlParams.get('name_0');
|
||||
const nameParams = [];
|
||||
|
||||
urlParams.forEach((value, key) => {
|
||||
if (key.startsWith('name_')) {
|
||||
const index = parseInt(key.split('_')[1], 10);
|
||||
nameParams[index] = value;
|
||||
}
|
||||
});
|
||||
|
||||
if (PR) {
|
||||
addKeyValueToStatus("PR", PR)
|
||||
} else {
|
||||
console.error("TODO")
|
||||
}
|
||||
addKeyValueToStatus("sha", sha);
|
||||
if (nameParams[1]) {
|
||||
addKeyValueToStatus("job", nameParams[1]);
|
||||
}
|
||||
addKeyValueToStatus("workflow", nameParams[0]);
|
||||
|
||||
if (PR && sha && root_name) {
|
||||
loadJSON(PR, sha, nameParams);
|
||||
} else {
|
||||
document.getElementById('title').textContent = 'Error: Missing required URL parameters: PR, sha, or name_0';
|
||||
}
|
||||
}
|
||||
|
||||
window.onload = init;
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
137
praktika/mangle.py
Normal file
137
praktika/mangle.py
Normal file
@ -0,0 +1,137 @@
|
||||
import copy
|
||||
import importlib.util
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict
|
||||
|
||||
from praktika import Job
|
||||
from praktika._settings import _USER_DEFINED_SETTINGS, _Settings
|
||||
from praktika.utils import ContextManager, Utils
|
||||
|
||||
|
||||
def _get_workflows(name=None, file=None):
|
||||
"""
|
||||
Gets user's workflow configs
|
||||
"""
|
||||
res = []
|
||||
|
||||
with ContextManager.cd():
|
||||
directory = Path(_Settings.WORKFLOWS_DIRECTORY)
|
||||
for py_file in directory.glob("*.py"):
|
||||
if file and file not in str(py_file):
|
||||
continue
|
||||
module_name = py_file.name.removeprefix(".py")
|
||||
spec = importlib.util.spec_from_file_location(
|
||||
module_name, f"{_Settings.WORKFLOWS_DIRECTORY}/{module_name}"
|
||||
)
|
||||
assert spec
|
||||
foo = importlib.util.module_from_spec(spec)
|
||||
assert spec.loader
|
||||
spec.loader.exec_module(foo)
|
||||
try:
|
||||
for workflow in foo.WORKFLOWS:
|
||||
if name:
|
||||
if name == workflow.name:
|
||||
print(f"Read workflow [{name}] config from [{module_name}]")
|
||||
res = [workflow]
|
||||
break
|
||||
else:
|
||||
continue
|
||||
else:
|
||||
res += foo.WORKFLOWS
|
||||
print(f"Read workflow configs from [{module_name}]")
|
||||
except Exception as e:
|
||||
print(
|
||||
f"WARNING: Failed to add WORKFLOWS config from [{module_name}], exception [{e}]"
|
||||
)
|
||||
if not res:
|
||||
Utils.raise_with_error(f"Failed to find workflow [{name or file}]")
|
||||
|
||||
for workflow in res:
|
||||
# add native jobs
|
||||
_update_workflow_with_native_jobs(workflow)
|
||||
# fill in artifact properties, e.g. _provided_by
|
||||
_update_workflow_artifacts(workflow)
|
||||
return res
|
||||
|
||||
|
||||
def _update_workflow_artifacts(workflow):
|
||||
artifact_job = {}
|
||||
for job in workflow.jobs:
|
||||
for artifact_name in job.provides:
|
||||
assert artifact_name not in artifact_job
|
||||
artifact_job[artifact_name] = job.name
|
||||
for artifact in workflow.artifacts:
|
||||
artifact._provided_by = artifact_job[artifact.name]
|
||||
|
||||
|
||||
def _update_workflow_with_native_jobs(workflow):
|
||||
if workflow.dockers:
|
||||
from praktika.native_jobs import _docker_build_job
|
||||
|
||||
print(f"Enable native job [{_docker_build_job.name}] for [{workflow.name}]")
|
||||
aux_job = copy.deepcopy(_docker_build_job)
|
||||
if workflow.enable_cache:
|
||||
print(
|
||||
f"Add automatic digest config for [{aux_job.name}] job since cache is enabled"
|
||||
)
|
||||
docker_digest_config = Job.CacheDigestConfig()
|
||||
for docker_config in workflow.dockers:
|
||||
docker_digest_config.include_paths.append(docker_config.path)
|
||||
aux_job.digest_config = docker_digest_config
|
||||
|
||||
workflow.jobs.insert(0, aux_job)
|
||||
for job in workflow.jobs[1:]:
|
||||
if not job.requires:
|
||||
job.requires = []
|
||||
job.requires.append(aux_job.name)
|
||||
|
||||
if (
|
||||
workflow.enable_cache
|
||||
or workflow.enable_report
|
||||
or workflow.enable_merge_ready_status
|
||||
):
|
||||
from praktika.native_jobs import _workflow_config_job
|
||||
|
||||
print(f"Enable native job [{_workflow_config_job.name}] for [{workflow.name}]")
|
||||
aux_job = copy.deepcopy(_workflow_config_job)
|
||||
workflow.jobs.insert(0, aux_job)
|
||||
for job in workflow.jobs[1:]:
|
||||
if not job.requires:
|
||||
job.requires = []
|
||||
job.requires.append(aux_job.name)
|
||||
|
||||
if workflow.enable_merge_ready_status:
|
||||
from praktika.native_jobs import _final_job
|
||||
|
||||
print(f"Enable native job [{_final_job.name}] for [{workflow.name}]")
|
||||
aux_job = copy.deepcopy(_final_job)
|
||||
for job in workflow.jobs:
|
||||
aux_job.requires.append(job.name)
|
||||
workflow.jobs.append(aux_job)
|
||||
|
||||
|
||||
def _get_user_settings() -> Dict[str, Any]:
|
||||
"""
|
||||
Gets user's settings
|
||||
"""
|
||||
res = {} # type: Dict[str, Any]
|
||||
|
||||
directory = Path(_Settings.SETTINGS_DIRECTORY)
|
||||
for py_file in directory.glob("*.py"):
|
||||
module_name = py_file.name.removeprefix(".py")
|
||||
spec = importlib.util.spec_from_file_location(
|
||||
module_name, f"{_Settings.SETTINGS_DIRECTORY}/{module_name}"
|
||||
)
|
||||
assert spec
|
||||
foo = importlib.util.module_from_spec(spec)
|
||||
assert spec.loader
|
||||
spec.loader.exec_module(foo)
|
||||
for setting in _USER_DEFINED_SETTINGS:
|
||||
try:
|
||||
value = getattr(foo, setting)
|
||||
res[setting] = value
|
||||
print(f"Apply user defined setting [{setting} = {value}]")
|
||||
except Exception as e:
|
||||
pass
|
||||
|
||||
return res
|
378
praktika/native_jobs.py
Normal file
378
praktika/native_jobs.py
Normal file
@ -0,0 +1,378 @@
|
||||
import sys
|
||||
from typing import Dict
|
||||
|
||||
from praktika import Job, Workflow
|
||||
from praktika._environment import _Environment
|
||||
from praktika.cidb import CIDB
|
||||
from praktika.digest import Digest
|
||||
from praktika.docker import Docker
|
||||
from praktika.gh import GH
|
||||
from praktika.hook_cache import CacheRunnerHooks
|
||||
from praktika.hook_html import HtmlRunnerHooks
|
||||
from praktika.mangle import _get_workflows
|
||||
from praktika.result import Result, ResultInfo
|
||||
from praktika.runtime import RunConfig
|
||||
from praktika.s3 import S3
|
||||
from praktika.settings import Settings
|
||||
from praktika.utils import Shell, Utils
|
||||
|
||||
assert Settings.CI_CONFIG_RUNS_ON
|
||||
|
||||
_workflow_config_job = Job.Config(
|
||||
name=Settings.CI_CONFIG_JOB_NAME,
|
||||
runs_on=Settings.CI_CONFIG_RUNS_ON,
|
||||
job_requirements=(
|
||||
Job.Requirements(
|
||||
python=Settings.INSTALL_PYTHON_FOR_NATIVE_JOBS,
|
||||
python_requirements_txt=Settings.INSTALL_PYTHON_REQS_FOR_NATIVE_JOBS,
|
||||
)
|
||||
if Settings.INSTALL_PYTHON_REQS_FOR_NATIVE_JOBS
|
||||
else None
|
||||
),
|
||||
command=f"{Settings.PYTHON_INTERPRETER} -m praktika.native_jobs '{Settings.CI_CONFIG_JOB_NAME}'",
|
||||
)
|
||||
|
||||
_docker_build_job = Job.Config(
|
||||
name=Settings.DOCKER_BUILD_JOB_NAME,
|
||||
runs_on=Settings.DOCKER_BUILD_RUNS_ON,
|
||||
job_requirements=Job.Requirements(
|
||||
python=Settings.INSTALL_PYTHON_FOR_NATIVE_JOBS,
|
||||
python_requirements_txt="",
|
||||
),
|
||||
timeout=4 * 3600,
|
||||
command=f"{Settings.PYTHON_INTERPRETER} -m praktika.native_jobs '{Settings.DOCKER_BUILD_JOB_NAME}'",
|
||||
)
|
||||
|
||||
_final_job = Job.Config(
|
||||
name=Settings.FINISH_WORKFLOW_JOB_NAME,
|
||||
runs_on=Settings.CI_CONFIG_RUNS_ON,
|
||||
job_requirements=Job.Requirements(
|
||||
python=Settings.INSTALL_PYTHON_FOR_NATIVE_JOBS,
|
||||
python_requirements_txt="",
|
||||
),
|
||||
command=f"{Settings.PYTHON_INTERPRETER} -m praktika.native_jobs '{Settings.FINISH_WORKFLOW_JOB_NAME}'",
|
||||
run_unless_cancelled=True,
|
||||
)
|
||||
|
||||
|
||||
def _build_dockers(workflow, job_name):
|
||||
print(f"Start [{job_name}], workflow [{workflow.name}]")
|
||||
dockers = workflow.dockers
|
||||
ready = []
|
||||
results = []
|
||||
job_status = Result.Status.SUCCESS
|
||||
job_info = ""
|
||||
dockers = Docker.sort_in_build_order(dockers)
|
||||
docker_digests = {} # type: Dict[str, str]
|
||||
for docker in dockers:
|
||||
docker_digests[docker.name] = Digest().calc_docker_digest(docker, dockers)
|
||||
|
||||
if not Shell.check(
|
||||
"docker buildx inspect --bootstrap | grep -q docker-container", verbose=True
|
||||
):
|
||||
print("Install docker container driver")
|
||||
if not Shell.check(
|
||||
"docker buildx create --use --name mybuilder --driver docker-container",
|
||||
verbose=True,
|
||||
):
|
||||
job_status = Result.Status.FAILED
|
||||
job_info = "Failed to install docker buildx driver"
|
||||
|
||||
if job_status == Result.Status.SUCCESS:
|
||||
if not Docker.login(
|
||||
Settings.DOCKERHUB_USERNAME,
|
||||
user_password=workflow.get_secret(Settings.DOCKERHUB_SECRET).get_value(),
|
||||
):
|
||||
job_status = Result.Status.FAILED
|
||||
job_info = "Failed to login to dockerhub"
|
||||
|
||||
if job_status == Result.Status.SUCCESS:
|
||||
for docker in dockers:
|
||||
assert (
|
||||
docker.name not in ready
|
||||
), f"All docker names must be uniq [{dockers}]"
|
||||
stopwatch = Utils.Stopwatch()
|
||||
info = f"{docker.name}:{docker_digests[docker.name]}"
|
||||
log_file = f"{Settings.OUTPUT_DIR}/docker_{Utils.normalize_string(docker.name)}.log"
|
||||
files = []
|
||||
|
||||
code, out, err = Shell.get_res_stdout_stderr(
|
||||
f"docker manifest inspect {docker.name}:{docker_digests[docker.name]}"
|
||||
)
|
||||
print(
|
||||
f"Docker inspect results for {docker.name}:{docker_digests[docker.name]}: exit code [{code}], out [{out}], err [{err}]"
|
||||
)
|
||||
if "no such manifest" in err:
|
||||
ret_code = Docker.build(
|
||||
docker, log_file=log_file, digests=docker_digests, add_latest=False
|
||||
)
|
||||
if ret_code == 0:
|
||||
status = Result.Status.SUCCESS
|
||||
else:
|
||||
status = Result.Status.FAILED
|
||||
job_status = Result.Status.FAILED
|
||||
info += f", failed with exit code: {ret_code}, see log"
|
||||
files.append(log_file)
|
||||
else:
|
||||
print(
|
||||
f"Docker image [{docker.name}:{docker_digests[docker.name]} exists - skip build"
|
||||
)
|
||||
status = Result.Status.SKIPPED
|
||||
ready.append(docker.name)
|
||||
results.append(
|
||||
Result(
|
||||
name=docker.name,
|
||||
status=status,
|
||||
info=info,
|
||||
duration=stopwatch.duration,
|
||||
start_time=stopwatch.start_time,
|
||||
files=files,
|
||||
)
|
||||
)
|
||||
Result.from_fs(job_name).set_status(job_status).set_results(results).set_info(
|
||||
job_info
|
||||
)
|
||||
|
||||
if job_status != Result.Status.SUCCESS:
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def _config_workflow(workflow: Workflow.Config, job_name):
|
||||
def _check_yaml_up_to_date():
|
||||
print("Check workflows are up to date")
|
||||
stop_watch = Utils.Stopwatch()
|
||||
exit_code, output, err = Shell.get_res_stdout_stderr(
|
||||
f"git diff-index HEAD -- {Settings.WORKFLOW_PATH_PREFIX}"
|
||||
)
|
||||
info = ""
|
||||
status = Result.Status.SUCCESS
|
||||
if exit_code != 0:
|
||||
info = f"workspace has uncommitted files unexpectedly [{output}]"
|
||||
status = Result.Status.ERROR
|
||||
print("ERROR: ", info)
|
||||
else:
|
||||
Shell.check(f"{Settings.PYTHON_INTERPRETER} -m praktika --generate")
|
||||
exit_code, output, err = Shell.get_res_stdout_stderr(
|
||||
f"git diff-index HEAD -- {Settings.WORKFLOW_PATH_PREFIX}"
|
||||
)
|
||||
if exit_code != 0:
|
||||
info = f"workspace has outdated workflows [{output}] - regenerate with [python -m praktika --generate]"
|
||||
status = Result.Status.ERROR
|
||||
print("ERROR: ", info)
|
||||
|
||||
return (
|
||||
Result(
|
||||
name="Check Workflows updated",
|
||||
status=status,
|
||||
start_time=stop_watch.start_time,
|
||||
duration=stop_watch.duration,
|
||||
info=info,
|
||||
),
|
||||
info,
|
||||
)
|
||||
|
||||
def _check_secrets(secrets):
|
||||
print("Check Secrets")
|
||||
stop_watch = Utils.Stopwatch()
|
||||
infos = []
|
||||
for secret_config in secrets:
|
||||
value = secret_config.get_value()
|
||||
if not value:
|
||||
info = f"ERROR: Failed to read secret [{secret_config.name}]"
|
||||
infos.append(info)
|
||||
print(info)
|
||||
|
||||
info = "\n".join(infos)
|
||||
return (
|
||||
Result(
|
||||
name="Check Secrets",
|
||||
status=(Result.Status.FAILED if infos else Result.Status.SUCCESS),
|
||||
start_time=stop_watch.start_time,
|
||||
duration=stop_watch.duration,
|
||||
info=info,
|
||||
),
|
||||
info,
|
||||
)
|
||||
|
||||
def _check_db(workflow):
|
||||
stop_watch = Utils.Stopwatch()
|
||||
res, info = CIDB(
|
||||
workflow.get_secret(Settings.SECRET_CI_DB_URL).get_value(),
|
||||
workflow.get_secret(Settings.SECRET_CI_DB_PASSWORD).get_value(),
|
||||
).check()
|
||||
return (
|
||||
Result(
|
||||
name="Check CI DB",
|
||||
status=(Result.Status.FAILED if not res else Result.Status.SUCCESS),
|
||||
start_time=stop_watch.start_time,
|
||||
duration=stop_watch.duration,
|
||||
info=info,
|
||||
),
|
||||
info,
|
||||
)
|
||||
|
||||
print(f"Start [{job_name}], workflow [{workflow.name}]")
|
||||
results = []
|
||||
files = []
|
||||
info_lines = []
|
||||
job_status = Result.Status.SUCCESS
|
||||
|
||||
workflow_config = RunConfig(
|
||||
name=workflow.name,
|
||||
digest_jobs={},
|
||||
digest_dockers={},
|
||||
sha=_Environment.get().SHA,
|
||||
cache_success=[],
|
||||
cache_success_base64=[],
|
||||
cache_artifacts={},
|
||||
).dump()
|
||||
|
||||
# checks:
|
||||
result_, info = _check_yaml_up_to_date()
|
||||
if result_.status != Result.Status.SUCCESS:
|
||||
print("ERROR: yaml files are outdated - regenerate, commit and push")
|
||||
job_status = Result.Status.ERROR
|
||||
info_lines.append(job_name + ": " + info)
|
||||
results.append(result_)
|
||||
|
||||
if workflow.secrets:
|
||||
result_, info = _check_secrets(workflow.secrets)
|
||||
if result_.status != Result.Status.SUCCESS:
|
||||
print(f"ERROR: Invalid secrets in workflow [{workflow.name}]")
|
||||
job_status = Result.Status.ERROR
|
||||
info_lines.append(job_name + ": " + info)
|
||||
results.append(result_)
|
||||
|
||||
if workflow.enable_cidb:
|
||||
result_, info = _check_db(workflow)
|
||||
if result_.status != Result.Status.SUCCESS:
|
||||
job_status = Result.Status.ERROR
|
||||
info_lines.append(job_name + ": " + info)
|
||||
results.append(result_)
|
||||
|
||||
# config:
|
||||
if workflow.dockers:
|
||||
print("Calculate docker's digests")
|
||||
dockers = workflow.dockers
|
||||
dockers = Docker.sort_in_build_order(dockers)
|
||||
for docker in dockers:
|
||||
workflow_config.digest_dockers[docker.name] = Digest().calc_docker_digest(
|
||||
docker, dockers
|
||||
)
|
||||
workflow_config.dump()
|
||||
|
||||
if workflow.enable_cache:
|
||||
print("Cache Lookup")
|
||||
stop_watch = Utils.Stopwatch()
|
||||
workflow_config = CacheRunnerHooks.configure(workflow)
|
||||
results.append(
|
||||
Result(
|
||||
name="Cache Lookup",
|
||||
status=Result.Status.SUCCESS,
|
||||
start_time=stop_watch.start_time,
|
||||
duration=stop_watch.duration,
|
||||
)
|
||||
)
|
||||
files.append(RunConfig.file_name_static(workflow.name))
|
||||
|
||||
workflow_config.dump()
|
||||
|
||||
if workflow.enable_report:
|
||||
print("Init report")
|
||||
stop_watch = Utils.Stopwatch()
|
||||
HtmlRunnerHooks.configure(workflow)
|
||||
results.append(
|
||||
Result(
|
||||
name="Init Report",
|
||||
status=Result.Status.SUCCESS,
|
||||
start_time=stop_watch.start_time,
|
||||
duration=stop_watch.duration,
|
||||
)
|
||||
)
|
||||
files.append(Result.file_name_static(workflow.name))
|
||||
|
||||
Result.from_fs(job_name).set_status(job_status).set_results(results).set_files(
|
||||
files
|
||||
).set_info("\n".join(info_lines))
|
||||
|
||||
if job_status != Result.Status.SUCCESS:
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def _finish_workflow(workflow, job_name):
|
||||
print(f"Start [{job_name}], workflow [{workflow.name}]")
|
||||
env = _Environment.get()
|
||||
|
||||
print("Check Actions statuses")
|
||||
print(env.get_needs_statuses())
|
||||
|
||||
print("Check Workflow results")
|
||||
S3.copy_result_from_s3(
|
||||
Result.file_name_static(workflow.name),
|
||||
lock=False,
|
||||
)
|
||||
workflow_result = Result.from_fs(workflow.name)
|
||||
|
||||
ready_for_merge_status = Result.Status.SUCCESS
|
||||
ready_for_merge_description = ""
|
||||
failed_results = []
|
||||
update_final_report = False
|
||||
for result in workflow_result.results:
|
||||
if result.name == job_name or result.status in (
|
||||
Result.Status.SUCCESS,
|
||||
Result.Status.SKIPPED,
|
||||
):
|
||||
continue
|
||||
if not result.is_completed():
|
||||
print(
|
||||
f"ERROR: not finished job [{result.name}] in the workflow - set status to error"
|
||||
)
|
||||
result.status = Result.Status.ERROR
|
||||
# dump workflow result after update - to have an updated result in post
|
||||
workflow_result.dump()
|
||||
# add error into env - should apper in the report
|
||||
env.add_info(ResultInfo.NOT_FINALIZED + f" [{result.name}]")
|
||||
update_final_report = True
|
||||
job = workflow.get_job(result.name)
|
||||
if not job or not job.allow_merge_on_failure:
|
||||
print(
|
||||
f"NOTE: Result for [{result.name}] has not ok status [{result.status}]"
|
||||
)
|
||||
ready_for_merge_status = Result.Status.FAILED
|
||||
failed_results.append(result.name.split("(", maxsplit=1)[0]) # cut name
|
||||
|
||||
if failed_results:
|
||||
ready_for_merge_description = f"failed: {', '.join(failed_results)}"
|
||||
|
||||
if not GH.post_commit_status(
|
||||
name=Settings.READY_FOR_MERGE_STATUS_NAME + f" [{workflow.name}]",
|
||||
status=ready_for_merge_status,
|
||||
description=ready_for_merge_description,
|
||||
url="",
|
||||
):
|
||||
print(f"ERROR: failed to set status [{Settings.READY_FOR_MERGE_STATUS_NAME}]")
|
||||
env.add_info(ResultInfo.GH_STATUS_ERROR)
|
||||
|
||||
if update_final_report:
|
||||
S3.copy_result_to_s3(
|
||||
workflow_result,
|
||||
unlock=False,
|
||||
) # no lock - no unlock
|
||||
|
||||
Result.from_fs(job_name).set_status(Result.Status.SUCCESS).set_info(
|
||||
ready_for_merge_description
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
job_name = sys.argv[1]
|
||||
assert job_name, "Job name must be provided as input argument"
|
||||
workflow = _get_workflows(name=_Environment.get().WORKFLOW_NAME)[0]
|
||||
if job_name == Settings.DOCKER_BUILD_JOB_NAME:
|
||||
_build_dockers(workflow, job_name)
|
||||
elif job_name == Settings.CI_CONFIG_JOB_NAME:
|
||||
_config_workflow(workflow, job_name)
|
||||
elif job_name == Settings.FINISH_WORKFLOW_JOB_NAME:
|
||||
_finish_workflow(workflow, job_name)
|
||||
else:
|
||||
assert False, f"BUG, job name [{job_name}]"
|
258
praktika/parser.py
Normal file
258
praktika/parser.py
Normal file
@ -0,0 +1,258 @@
|
||||
import dataclasses
|
||||
from typing import Any, Dict, List
|
||||
|
||||
from praktika import Artifact, Workflow
|
||||
from praktika.mangle import _get_workflows
|
||||
|
||||
|
||||
class AddonType:
|
||||
PY = "py"
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class WorkflowYaml:
|
||||
@dataclasses.dataclass
|
||||
class JobYaml:
|
||||
name: str
|
||||
needs: List[str]
|
||||
runs_on: List[str]
|
||||
artifacts_gh_requires: List["WorkflowYaml.ArtifactYaml"]
|
||||
artifacts_gh_provides: List["WorkflowYaml.ArtifactYaml"]
|
||||
addons: List["WorkflowYaml.JobAddonYaml"]
|
||||
gh_app_auth: bool
|
||||
run_unless_cancelled: bool
|
||||
parameter: Any
|
||||
|
||||
def __repr__(self):
|
||||
return self.name
|
||||
|
||||
@dataclasses.dataclass
|
||||
class ArtifactYaml:
|
||||
name: str
|
||||
provided_by: str
|
||||
required_by: List[str]
|
||||
path: str
|
||||
type: str
|
||||
|
||||
def __repr__(self):
|
||||
return self.name
|
||||
|
||||
@dataclasses.dataclass
|
||||
class JobAddonYaml:
|
||||
install_python: bool
|
||||
requirements_txt_path: str
|
||||
|
||||
name: str
|
||||
event: str
|
||||
branches: List[str]
|
||||
jobs: List[JobYaml]
|
||||
job_to_config: Dict[str, JobYaml]
|
||||
artifact_to_config: Dict[str, ArtifactYaml]
|
||||
secret_names_gh: List[str]
|
||||
enable_cache: bool
|
||||
|
||||
|
||||
class WorkflowConfigParser:
|
||||
def __init__(self, config: Workflow.Config):
|
||||
self.workflow_name = config.name
|
||||
self.config = config
|
||||
self.requires_all = [] # type: List[str]
|
||||
self.provides_all = [] # type: List[str]
|
||||
self.job_names_all = [] # type: List[str]
|
||||
self.artifact_to_providing_job_map = {} # type: Dict[str, List[str]]
|
||||
self.artifact_to_job_requires_map = {} # type: Dict[str, List[str]]
|
||||
self.artifact_map = {} # type: Dict[str, List[Artifact.Config]]
|
||||
|
||||
self.job_to_provides_artifacts = {} # type: Dict[str, List[Artifact.Config]]
|
||||
self.job_to_requires_artifacts = {} # type: Dict[str, List[Artifact.Config]]
|
||||
|
||||
self.workflow_yaml_config = WorkflowYaml(
|
||||
name=self.workflow_name,
|
||||
event=config.event,
|
||||
branches=[],
|
||||
jobs=[],
|
||||
secret_names_gh=[],
|
||||
job_to_config={},
|
||||
artifact_to_config={},
|
||||
enable_cache=False,
|
||||
)
|
||||
|
||||
def parse(self):
|
||||
self.workflow_yaml_config.enable_cache = self.config.enable_cache
|
||||
|
||||
# populate WorkflowYaml.branches
|
||||
if self.config.event in (Workflow.Event.PUSH,):
|
||||
assert (
|
||||
self.config.branches
|
||||
), f'Workflow.Config.branches (e.g. ["main"]) must be set for workflow with event [{self.config.event}], workflow [{self.workflow_name}]'
|
||||
assert (
|
||||
not self.config.base_branches
|
||||
), f'Workflow.Config.base_branches (e.g. ["main"]) must not be set for workflow with event [{self.config.event}], workflow [{self.workflow_name}]'
|
||||
assert isinstance(
|
||||
self.config.branches, list
|
||||
), f'Workflow.Config.branches must be of type list (e.g. ["main"]), workflow [{self.workflow_name}]'
|
||||
self.workflow_yaml_config.branches = self.config.branches
|
||||
elif self.config.event in (Workflow.Event.PULL_REQUEST,):
|
||||
assert (
|
||||
self.config.base_branches
|
||||
), f'Workflow.Config.base_branches (e.g. ["main"]) must be set for workflow with event [{self.config.event}], workflow [{self.workflow_name}]'
|
||||
assert (
|
||||
not self.config.branches
|
||||
), f'Workflow.Config.branches (e.g. ["main"]) must not be set for workflow with event [{self.config.event}], workflow [{self.workflow_name}]'
|
||||
assert isinstance(
|
||||
self.config.base_branches, list
|
||||
), f'Workflow.Config.base_branches must be of type list (e.g. ["main"]), workflow [{self.workflow_name}]'
|
||||
self.workflow_yaml_config.branches = self.config.base_branches
|
||||
|
||||
# populate WorkflowYaml.artifact_to_config with phony artifacts
|
||||
for job in self.config.jobs:
|
||||
assert (
|
||||
job.name not in self.workflow_yaml_config.artifact_to_config
|
||||
), f"Not uniq Job name [{job.name}], workflow [{self.workflow_name}]"
|
||||
self.workflow_yaml_config.artifact_to_config[job.name] = (
|
||||
WorkflowYaml.ArtifactYaml(
|
||||
name=job.name,
|
||||
provided_by=job.name,
|
||||
required_by=[],
|
||||
path="",
|
||||
type=Artifact.Type.PHONY,
|
||||
)
|
||||
)
|
||||
|
||||
# populate jobs
|
||||
for job in self.config.jobs:
|
||||
job_yaml_config = WorkflowYaml.JobYaml(
|
||||
name=job.name,
|
||||
addons=[],
|
||||
artifacts_gh_requires=[],
|
||||
artifacts_gh_provides=[],
|
||||
needs=[],
|
||||
runs_on=[],
|
||||
gh_app_auth=False,
|
||||
run_unless_cancelled=job.run_unless_cancelled,
|
||||
parameter=None,
|
||||
)
|
||||
self.workflow_yaml_config.jobs.append(job_yaml_config)
|
||||
assert (
|
||||
job.name not in self.workflow_yaml_config.job_to_config
|
||||
), f"Job name [{job.name}] is not uniq, workflow [{self.workflow_name}]"
|
||||
self.workflow_yaml_config.job_to_config[job.name] = job_yaml_config
|
||||
|
||||
# populate WorkflowYaml.artifact_to_config
|
||||
if self.config.artifacts:
|
||||
for artifact in self.config.artifacts:
|
||||
assert (
|
||||
artifact.name not in self.workflow_yaml_config.artifact_to_config
|
||||
), f"Artifact name [{artifact.name}] is not uniq, workflow [{self.workflow_name}]"
|
||||
artifact_yaml_config = WorkflowYaml.ArtifactYaml(
|
||||
name=artifact.name,
|
||||
provided_by="",
|
||||
required_by=[],
|
||||
path=artifact.path,
|
||||
type=artifact.type,
|
||||
)
|
||||
self.workflow_yaml_config.artifact_to_config[artifact.name] = (
|
||||
artifact_yaml_config
|
||||
)
|
||||
|
||||
# populate ArtifactYaml.provided_by
|
||||
for job in self.config.jobs:
|
||||
if job.provides:
|
||||
for artifact_name in job.provides:
|
||||
assert (
|
||||
artifact_name in self.workflow_yaml_config.artifact_to_config
|
||||
), f"Artifact [{artifact_name}] has no config, job [{job.name}], workflow [{self.workflow_name}]"
|
||||
assert not self.workflow_yaml_config.artifact_to_config[
|
||||
artifact_name
|
||||
].provided_by, f"Artifact [{artifact_name}] provided by multiple jobs [{self.workflow_yaml_config.artifact_to_config[artifact_name].provided_by}] and [{job.name}]"
|
||||
self.workflow_yaml_config.artifact_to_config[
|
||||
artifact_name
|
||||
].provided_by = job.name
|
||||
|
||||
# populate ArtifactYaml.required_by
|
||||
for job in self.config.jobs:
|
||||
if job.requires:
|
||||
for artifact_name in job.requires:
|
||||
assert (
|
||||
artifact_name in self.workflow_yaml_config.artifact_to_config
|
||||
), f"Artifact [{artifact_name}] has no config, job [{job.name}], workflow [{self.workflow_name}]"
|
||||
assert self.workflow_yaml_config.artifact_to_config[
|
||||
artifact_name
|
||||
].provided_by, f"Artifact [{artifact_name}] has no job providing it, required by job [{job.name}], workflow [{self.workflow_name}]"
|
||||
self.workflow_yaml_config.artifact_to_config[
|
||||
artifact_name
|
||||
].required_by.append(job.name)
|
||||
|
||||
# populate JobYaml.addons
|
||||
for job in self.config.jobs:
|
||||
if job.job_requirements:
|
||||
addon_yaml = WorkflowYaml.JobAddonYaml(
|
||||
requirements_txt_path=job.job_requirements.python_requirements_txt,
|
||||
install_python=job.job_requirements.python,
|
||||
)
|
||||
self.workflow_yaml_config.job_to_config[job.name].addons.append(
|
||||
addon_yaml
|
||||
)
|
||||
|
||||
if self.config.enable_report:
|
||||
for job in self.config.jobs:
|
||||
# auth required for every job with enabled HTML, so that workflow summary status can be updated
|
||||
self.workflow_yaml_config.job_to_config[job.name].gh_app_auth = True
|
||||
|
||||
# populate JobYaml.runs_on
|
||||
for job in self.config.jobs:
|
||||
self.workflow_yaml_config.job_to_config[job.name].runs_on = job.runs_on
|
||||
|
||||
# populate JobYaml.artifacts_gh_requires, JobYaml.artifacts_gh_provides and JobYaml.needs
|
||||
for (
|
||||
artifact_name,
|
||||
artifact,
|
||||
) in self.workflow_yaml_config.artifact_to_config.items():
|
||||
# assert (
|
||||
# artifact.provided_by
|
||||
# and artifact.provided_by in self.workflow_yaml_config.job_to_config
|
||||
# ), f"Artifact [{artifact_name}] has no valid job providing it [{artifact.provided_by}]"
|
||||
for job_name in artifact.required_by:
|
||||
if (
|
||||
artifact.provided_by
|
||||
not in self.workflow_yaml_config.job_to_config[job_name].needs
|
||||
):
|
||||
self.workflow_yaml_config.job_to_config[job_name].needs.append(
|
||||
artifact.provided_by
|
||||
)
|
||||
if artifact.type in (Artifact.Type.GH,):
|
||||
self.workflow_yaml_config.job_to_config[
|
||||
job_name
|
||||
].artifacts_gh_requires.append(artifact)
|
||||
elif artifact.type in (Artifact.Type.PHONY, Artifact.Type.S3):
|
||||
pass
|
||||
else:
|
||||
assert (
|
||||
False
|
||||
), f"Artifact [{artifact_name}] has unsupported type [{artifact.type}]"
|
||||
if not artifact.required_by and artifact.type != Artifact.Type.PHONY:
|
||||
print(
|
||||
f"WARNING: Artifact [{artifact_name}] provided by job [{artifact.provided_by}] not required by any job in workflow [{self.workflow_name}]"
|
||||
)
|
||||
if artifact.type == Artifact.Type.GH:
|
||||
self.workflow_yaml_config.job_to_config[
|
||||
artifact.provided_by
|
||||
].artifacts_gh_provides.append(artifact)
|
||||
|
||||
# populate JobYaml.parametrize
|
||||
for job in self.config.jobs:
|
||||
self.workflow_yaml_config.job_to_config[job.name].parameter = job.parameter
|
||||
|
||||
# populate secrets
|
||||
for secret_config in self.config.secrets:
|
||||
if secret_config.is_gh():
|
||||
self.workflow_yaml_config.secret_names_gh.append(secret_config.name)
|
||||
|
||||
return self
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# test
|
||||
workflows = _get_workflows()
|
||||
for workflow in workflows:
|
||||
WorkflowConfigParser(workflow).parse()
|
354
praktika/result.py
Normal file
354
praktika/result.py
Normal file
@ -0,0 +1,354 @@
|
||||
import dataclasses
|
||||
import datetime
|
||||
import sys
|
||||
from collections.abc import Container
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from praktika._environment import _Environment
|
||||
from praktika._settings import _Settings
|
||||
from praktika.utils import ContextManager, MetaClasses, Shell, Utils
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class Result(MetaClasses.Serializable):
|
||||
"""
|
||||
Represents the outcome of a workflow/job/task or any operation, along with associated metadata.
|
||||
|
||||
This class supports nesting of results to represent tasks with sub-tasks, and includes
|
||||
various attributes to track status, timing, files, and links.
|
||||
|
||||
Attributes:
|
||||
name (str): The name of the task.
|
||||
status (str): The current status of the task. Should be one of the values defined in the Status class.
|
||||
start_time (Optional[float]): The start time of the task in Unix timestamp format. None if not started.
|
||||
duration (Optional[float]): The duration of the task in seconds. None if not completed.
|
||||
results (List[Result]): A list of sub-results representing nested tasks.
|
||||
files (List[str]): A list of file paths or names related to the result.
|
||||
links (List[str]): A list of URLs related to the result (e.g., links to reports or resources).
|
||||
info (str): Additional information about the result. Free-form text.
|
||||
# TODO: rename
|
||||
aux_links (List[str]): A list of auxiliary links that provide additional context for the result.
|
||||
# TODO: remove
|
||||
html_link (str): A direct link to an HTML representation of the result (e.g., a detailed report page).
|
||||
|
||||
Inner Class:
|
||||
Status: Defines possible statuses for the task, such as "success", "failure", etc.
|
||||
"""
|
||||
|
||||
class Status:
|
||||
SKIPPED = "skipped"
|
||||
SUCCESS = "success"
|
||||
FAILED = "failure"
|
||||
PENDING = "pending"
|
||||
RUNNING = "running"
|
||||
ERROR = "error"
|
||||
|
||||
name: str
|
||||
status: str
|
||||
start_time: Optional[float] = None
|
||||
duration: Optional[float] = None
|
||||
results: List["Result"] = dataclasses.field(default_factory=list)
|
||||
files: List[str] = dataclasses.field(default_factory=list)
|
||||
links: List[str] = dataclasses.field(default_factory=list)
|
||||
info: str = ""
|
||||
aux_links: List[str] = dataclasses.field(default_factory=list)
|
||||
html_link: str = ""
|
||||
|
||||
@staticmethod
|
||||
def create_from(
|
||||
name="",
|
||||
results: List["Result"] = None,
|
||||
stopwatch: Utils.Stopwatch = None,
|
||||
status="",
|
||||
files=None,
|
||||
info="",
|
||||
with_info_from_results=True,
|
||||
):
|
||||
if isinstance(status, bool):
|
||||
status = Result.Status.SUCCESS if status else Result.Status.FAILED
|
||||
if not results and not status:
|
||||
print("ERROR: Either .results or .status must be provided")
|
||||
raise
|
||||
if not name:
|
||||
name = _Environment.get().JOB_NAME
|
||||
if not name:
|
||||
print("ERROR: Failed to guess the .name")
|
||||
raise
|
||||
result_status = status or Result.Status.SUCCESS
|
||||
infos = []
|
||||
if info:
|
||||
if isinstance(info, Container):
|
||||
infos += info
|
||||
else:
|
||||
infos.append(info)
|
||||
if results and not status:
|
||||
for result in results:
|
||||
if result.status not in (Result.Status.SUCCESS, Result.Status.FAILED):
|
||||
Utils.raise_with_error(
|
||||
f"Unexpected result status [{result.status}] for Result.create_from call"
|
||||
)
|
||||
if result.status != Result.Status.SUCCESS:
|
||||
result_status = Result.Status.FAILED
|
||||
if results:
|
||||
for result in results:
|
||||
if result.info and with_info_from_results:
|
||||
infos.append(f"{result.name}: {result.info}")
|
||||
return Result(
|
||||
name=name,
|
||||
status=result_status,
|
||||
start_time=stopwatch.start_time if stopwatch else None,
|
||||
duration=stopwatch.duration if stopwatch else None,
|
||||
info="\n".join(infos) if infos else "",
|
||||
results=results or [],
|
||||
files=files or [],
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def get():
|
||||
return Result.from_fs(_Environment.get().JOB_NAME)
|
||||
|
||||
def is_completed(self):
|
||||
return self.status not in (Result.Status.PENDING, Result.Status.RUNNING)
|
||||
|
||||
def is_running(self):
|
||||
return self.status not in (Result.Status.RUNNING,)
|
||||
|
||||
def is_ok(self):
|
||||
return self.status in (Result.Status.SKIPPED, Result.Status.SUCCESS)
|
||||
|
||||
def set_status(self, status) -> "Result":
|
||||
self.status = status
|
||||
self.dump()
|
||||
return self
|
||||
|
||||
def set_success(self) -> "Result":
|
||||
return self.set_status(Result.Status.SUCCESS)
|
||||
|
||||
def set_results(self, results: List["Result"]) -> "Result":
|
||||
self.results = results
|
||||
self.dump()
|
||||
return self
|
||||
|
||||
def set_files(self, files) -> "Result":
|
||||
for file in files:
|
||||
assert Path(
|
||||
file
|
||||
).is_file(), f"Not valid file [{file}] from file list [{files}]"
|
||||
if not self.files:
|
||||
self.files = []
|
||||
self.files += files
|
||||
self.dump()
|
||||
return self
|
||||
|
||||
def set_info(self, info: str) -> "Result":
|
||||
if self.info:
|
||||
self.info += "\n"
|
||||
self.info += info
|
||||
self.dump()
|
||||
return self
|
||||
|
||||
def set_link(self, link) -> "Result":
|
||||
self.links.append(link)
|
||||
self.dump()
|
||||
return self
|
||||
|
||||
@classmethod
|
||||
def file_name_static(cls, name):
|
||||
return f"{_Settings.TEMP_DIR}/result_{Utils.normalize_string(name)}.json"
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, obj: Dict[str, Any]) -> "Result":
|
||||
sub_results = []
|
||||
for result_dict in obj["results"] or []:
|
||||
sub_res = cls.from_dict(result_dict)
|
||||
sub_results.append(sub_res)
|
||||
obj["results"] = sub_results
|
||||
return Result(**obj)
|
||||
|
||||
def update_duration(self):
|
||||
if not self.duration and self.start_time:
|
||||
self.duration = datetime.datetime.utcnow().timestamp() - self.start_time
|
||||
else:
|
||||
if not self.duration:
|
||||
print(
|
||||
f"NOTE: duration is set for job [{self.name}] Result - do not update by CI"
|
||||
)
|
||||
else:
|
||||
print(
|
||||
f"NOTE: start_time is not set for job [{self.name}] Result - do not update duration"
|
||||
)
|
||||
return self
|
||||
|
||||
def update_sub_result(self, result: "Result"):
|
||||
assert self.results, "BUG?"
|
||||
for i, result_ in enumerate(self.results):
|
||||
if result_.name == result.name:
|
||||
self.results[i] = result
|
||||
self._update_status()
|
||||
return self
|
||||
|
||||
def _update_status(self):
|
||||
was_pending = False
|
||||
was_running = False
|
||||
if self.status == self.Status.PENDING:
|
||||
was_pending = True
|
||||
if self.status == self.Status.RUNNING:
|
||||
was_running = True
|
||||
|
||||
has_pending, has_running, has_failed = False, False, False
|
||||
for result_ in self.results:
|
||||
if result_.status in (self.Status.RUNNING,):
|
||||
has_running = True
|
||||
if result_.status in (self.Status.PENDING,):
|
||||
has_pending = True
|
||||
if result_.status in (self.Status.ERROR, self.Status.FAILED):
|
||||
has_failed = True
|
||||
if has_running:
|
||||
self.status = self.Status.RUNNING
|
||||
elif has_pending:
|
||||
self.status = self.Status.PENDING
|
||||
elif has_failed:
|
||||
self.status = self.Status.FAILED
|
||||
else:
|
||||
self.status = self.Status.SUCCESS
|
||||
if (was_pending or was_running) and self.status not in (
|
||||
self.Status.PENDING,
|
||||
self.Status.RUNNING,
|
||||
):
|
||||
print("Pipeline finished")
|
||||
self.update_duration()
|
||||
|
||||
@classmethod
|
||||
def generate_pending(cls, name, results=None):
|
||||
return Result(
|
||||
name=name,
|
||||
status=Result.Status.PENDING,
|
||||
start_time=None,
|
||||
duration=None,
|
||||
results=results or [],
|
||||
files=[],
|
||||
links=[],
|
||||
info="",
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def generate_skipped(cls, name, results=None):
|
||||
return Result(
|
||||
name=name,
|
||||
status=Result.Status.SKIPPED,
|
||||
start_time=None,
|
||||
duration=None,
|
||||
results=results or [],
|
||||
files=[],
|
||||
links=[],
|
||||
info="from cache",
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def create_from_command_execution(
|
||||
cls,
|
||||
name,
|
||||
command,
|
||||
with_log=False,
|
||||
fail_fast=True,
|
||||
workdir=None,
|
||||
command_args=None,
|
||||
command_kwargs=None,
|
||||
):
|
||||
"""
|
||||
Executes shell commands or Python callables, optionally logging output, and handles errors.
|
||||
|
||||
:param name: Check name
|
||||
:param command: Shell command (str) or Python callable, or list of them.
|
||||
:param workdir: Optional working directory.
|
||||
:param with_log: Boolean flag to log output to a file.
|
||||
:param fail_fast: Boolean flag to stop execution if one command fails.
|
||||
:param command_args: Positional arguments for the callable command.
|
||||
:param command_kwargs: Keyword arguments for the callable command.
|
||||
:return: Result object with status and optional log file.
|
||||
"""
|
||||
|
||||
# Stopwatch to track execution time
|
||||
stop_watch_ = Utils.Stopwatch()
|
||||
command_args = command_args or []
|
||||
command_kwargs = command_kwargs or {}
|
||||
|
||||
# Set log file path if logging is enabled
|
||||
log_file = (
|
||||
f"{_Settings.TEMP_DIR}/{Utils.normalize_string(name)}.log"
|
||||
if with_log
|
||||
else None
|
||||
)
|
||||
|
||||
# Ensure the command is a list for consistent iteration
|
||||
if not isinstance(command, list):
|
||||
fail_fast = False
|
||||
command = [command]
|
||||
|
||||
print(f"> Starting execution for [{name}]")
|
||||
res = True # Track success/failure status
|
||||
error_infos = []
|
||||
for command_ in command:
|
||||
if callable(command_):
|
||||
# If command is a Python function, call it with provided arguments
|
||||
result = command_(*command_args, **command_kwargs)
|
||||
if isinstance(result, bool):
|
||||
res = result
|
||||
elif result:
|
||||
error_infos.append(str(result))
|
||||
res = False
|
||||
else:
|
||||
# Run shell command in a specified directory with logging and verbosity
|
||||
with ContextManager.cd(workdir):
|
||||
exit_code = Shell.run(command_, verbose=True, log_file=log_file)
|
||||
res = exit_code == 0
|
||||
|
||||
# If fail_fast is enabled, stop on first failure
|
||||
if not res and fail_fast:
|
||||
print(f"Execution stopped due to failure in [{command_}]")
|
||||
break
|
||||
|
||||
# Create and return the result object with status and log file (if any)
|
||||
return Result.create_from(
|
||||
name=name,
|
||||
status=res,
|
||||
stopwatch=stop_watch_,
|
||||
info=error_infos,
|
||||
files=[log_file] if log_file else None,
|
||||
)
|
||||
|
||||
def finish_job_accordingly(self):
|
||||
self.dump()
|
||||
if not self.is_ok():
|
||||
print("ERROR: Job Failed")
|
||||
for result in self.results:
|
||||
if not result.is_ok():
|
||||
print("Failed checks:")
|
||||
print(" | ", result)
|
||||
sys.exit(1)
|
||||
else:
|
||||
print("ok")
|
||||
|
||||
|
||||
class ResultInfo:
|
||||
SETUP_ENV_JOB_FAILED = (
|
||||
"Failed to set up job env, it's praktika bug or misconfiguration"
|
||||
)
|
||||
PRE_JOB_FAILED = (
|
||||
"Failed to do a job pre-run step, it's praktika bug or misconfiguration"
|
||||
)
|
||||
KILLED = "Job killed or terminated, no Result provided"
|
||||
NOT_FOUND_IMPOSSIBLE = (
|
||||
"No Result file (bug, or job misbehaviour, must not ever happen)"
|
||||
)
|
||||
SKIPPED_DUE_TO_PREVIOUS_FAILURE = "Skipped due to previous failure"
|
||||
TIMEOUT = "Timeout"
|
||||
|
||||
GH_STATUS_ERROR = "Failed to set GH commit status"
|
||||
|
||||
NOT_FINALIZED = (
|
||||
"Job did not not provide Result: job script bug, died CI runner or praktika bug"
|
||||
)
|
||||
|
||||
S3_ERROR = "S3 call failure"
|
348
praktika/runner.py
Normal file
348
praktika/runner.py
Normal file
@ -0,0 +1,348 @@
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import traceback
|
||||
from pathlib import Path
|
||||
|
||||
from praktika._environment import _Environment
|
||||
from praktika.artifact import Artifact
|
||||
from praktika.cidb import CIDB
|
||||
from praktika.digest import Digest
|
||||
from praktika.hook_cache import CacheRunnerHooks
|
||||
from praktika.hook_html import HtmlRunnerHooks
|
||||
from praktika.result import Result, ResultInfo
|
||||
from praktika.runtime import RunConfig
|
||||
from praktika.s3 import S3
|
||||
from praktika.settings import Settings
|
||||
from praktika.utils import Shell, TeePopen, Utils
|
||||
|
||||
|
||||
class Runner:
|
||||
@staticmethod
|
||||
def generate_dummy_environment(workflow, job):
|
||||
print("WARNING: Generate dummy env for local test")
|
||||
Shell.check(
|
||||
f"mkdir -p {Settings.TEMP_DIR} {Settings.INPUT_DIR} {Settings.OUTPUT_DIR}"
|
||||
)
|
||||
_Environment(
|
||||
WORKFLOW_NAME=workflow.name,
|
||||
JOB_NAME=job.name,
|
||||
REPOSITORY="",
|
||||
BRANCH="",
|
||||
SHA="",
|
||||
PR_NUMBER=-1,
|
||||
EVENT_TYPE="",
|
||||
JOB_OUTPUT_STREAM="",
|
||||
EVENT_FILE_PATH="",
|
||||
CHANGE_URL="",
|
||||
COMMIT_URL="",
|
||||
BASE_BRANCH="",
|
||||
RUN_URL="",
|
||||
RUN_ID="",
|
||||
INSTANCE_ID="",
|
||||
INSTANCE_TYPE="",
|
||||
INSTANCE_LIFE_CYCLE="",
|
||||
).dump()
|
||||
workflow_config = RunConfig(
|
||||
name=workflow.name,
|
||||
digest_jobs={},
|
||||
digest_dockers={},
|
||||
sha="",
|
||||
cache_success=[],
|
||||
cache_success_base64=[],
|
||||
cache_artifacts={},
|
||||
)
|
||||
for docker in workflow.dockers:
|
||||
workflow_config.digest_dockers[docker.name] = Digest().calc_docker_digest(
|
||||
docker, workflow.dockers
|
||||
)
|
||||
workflow_config.dump()
|
||||
|
||||
Result.generate_pending(job.name).dump()
|
||||
|
||||
def _setup_env(self, _workflow, job):
|
||||
# source env file to write data into fs (workflow config json, workflow status json)
|
||||
Shell.check(f". {Settings.ENV_SETUP_SCRIPT}", verbose=True, strict=True)
|
||||
|
||||
# parse the same env script and apply envs from python so that this process sees them
|
||||
with open(Settings.ENV_SETUP_SCRIPT, "r") as f:
|
||||
content = f.read()
|
||||
export_pattern = re.compile(
|
||||
r"export (\w+)=\$\(cat<<\'EOF\'\n(.*?)EOF\n\)", re.DOTALL
|
||||
)
|
||||
matches = export_pattern.findall(content)
|
||||
for key, value in matches:
|
||||
value = value.strip()
|
||||
os.environ[key] = value
|
||||
print(f"Set environment variable {key}.")
|
||||
|
||||
# TODO: remove
|
||||
os.environ["PYTHONPATH"] = os.getcwd()
|
||||
|
||||
print("Read GH Environment")
|
||||
env = _Environment.from_env()
|
||||
env.JOB_NAME = job.name
|
||||
env.PARAMETER = job.parameter
|
||||
env.dump()
|
||||
print(env)
|
||||
|
||||
return 0
|
||||
|
||||
def _pre_run(self, workflow, job):
|
||||
env = _Environment.get()
|
||||
|
||||
result = Result(
|
||||
name=job.name,
|
||||
status=Result.Status.RUNNING,
|
||||
start_time=Utils.timestamp(),
|
||||
)
|
||||
result.dump()
|
||||
|
||||
if workflow.enable_report and job.name != Settings.CI_CONFIG_JOB_NAME:
|
||||
print("Update Job and Workflow Report")
|
||||
HtmlRunnerHooks.pre_run(workflow, job)
|
||||
|
||||
print("Download required artifacts")
|
||||
required_artifacts = []
|
||||
if job.requires and workflow.artifacts:
|
||||
for requires_artifact_name in job.requires:
|
||||
for artifact in workflow.artifacts:
|
||||
if (
|
||||
artifact.name == requires_artifact_name
|
||||
and artifact.type == Artifact.Type.S3
|
||||
):
|
||||
required_artifacts.append(artifact)
|
||||
print(f"--- Job requires s3 artifacts [{required_artifacts}]")
|
||||
if workflow.enable_cache:
|
||||
prefixes = CacheRunnerHooks.pre_run(
|
||||
_job=job, _workflow=workflow, _required_artifacts=required_artifacts
|
||||
)
|
||||
else:
|
||||
prefixes = [env.get_s3_prefix()] * len(required_artifacts)
|
||||
for artifact, prefix in zip(required_artifacts, prefixes):
|
||||
s3_path = f"{Settings.S3_ARTIFACT_PATH}/{prefix}/{Utils.normalize_string(artifact._provided_by)}/{Path(artifact.path).name}"
|
||||
assert S3.copy_file_from_s3(s3_path=s3_path, local_path=Settings.INPUT_DIR)
|
||||
|
||||
return 0
|
||||
|
||||
def _run(self, workflow, job, docker="", no_docker=False, param=None):
|
||||
if param:
|
||||
if not isinstance(param, str):
|
||||
Utils.raise_with_error(
|
||||
f"Custom param for local tests must be of type str, got [{type(param)}]"
|
||||
)
|
||||
env = _Environment.get()
|
||||
env.LOCAL_RUN_PARAM = param
|
||||
env.dump()
|
||||
print(f"Custom param for local tests [{param}] dumped into Environment")
|
||||
|
||||
if job.run_in_docker and not no_docker:
|
||||
# TODO: add support for any image, including not from ci config (e.g. ubuntu:latest)
|
||||
docker_tag = RunConfig.from_fs(workflow.name).digest_dockers[
|
||||
job.run_in_docker
|
||||
]
|
||||
docker = docker or f"{job.run_in_docker}:{docker_tag}"
|
||||
cmd = f"docker run --rm --user \"$(id -u):$(id -g)\" -e PYTHONPATH='{Settings.DOCKER_WD}' --volume ./:{Settings.DOCKER_WD} --volume {Settings.TEMP_DIR}:{Settings.TEMP_DIR} --workdir={Settings.DOCKER_WD} {docker} {job.command}"
|
||||
else:
|
||||
cmd = job.command
|
||||
print(f"--- Run command [{cmd}]")
|
||||
|
||||
with TeePopen(cmd, timeout=job.timeout) as process:
|
||||
exit_code = process.wait()
|
||||
|
||||
result = Result.from_fs(job.name)
|
||||
if exit_code != 0:
|
||||
if not result.is_completed():
|
||||
if process.timeout_exceeded:
|
||||
print(
|
||||
f"WARNING: Job timed out: [{job.name}], timeout [{job.timeout}], exit code [{exit_code}]"
|
||||
)
|
||||
result.set_status(Result.Status.ERROR).set_info(
|
||||
ResultInfo.TIMEOUT
|
||||
)
|
||||
elif result.is_running():
|
||||
info = f"ERROR: Job terminated with an error, exit code [{exit_code}] - set status to [{Result.Status.ERROR}]"
|
||||
print(info)
|
||||
result.set_status(Result.Status.ERROR).set_info(info)
|
||||
else:
|
||||
info = f"ERROR: Invalid status [{result.status}] for exit code [{exit_code}] - switch to [{Result.Status.ERROR}]"
|
||||
print(info)
|
||||
result.set_status(Result.Status.ERROR).set_info(info)
|
||||
result.dump()
|
||||
|
||||
return exit_code
|
||||
|
||||
def _post_run(
|
||||
self, workflow, job, setup_env_exit_code, prerun_exit_code, run_exit_code
|
||||
):
|
||||
info_errors = []
|
||||
env = _Environment.get()
|
||||
result_exist = Result.exist(job.name)
|
||||
|
||||
if setup_env_exit_code != 0:
|
||||
info = f"ERROR: {ResultInfo.SETUP_ENV_JOB_FAILED}"
|
||||
print(info)
|
||||
# set Result with error and logs
|
||||
Result(
|
||||
name=job.name,
|
||||
status=Result.Status.ERROR,
|
||||
start_time=Utils.timestamp(),
|
||||
duration=0.0,
|
||||
info=info,
|
||||
).dump()
|
||||
elif prerun_exit_code != 0:
|
||||
info = f"ERROR: {ResultInfo.PRE_JOB_FAILED}"
|
||||
print(info)
|
||||
# set Result with error and logs
|
||||
Result(
|
||||
name=job.name,
|
||||
status=Result.Status.ERROR,
|
||||
start_time=Utils.timestamp(),
|
||||
duration=0.0,
|
||||
info=info,
|
||||
).dump()
|
||||
elif not result_exist:
|
||||
info = f"ERROR: {ResultInfo.NOT_FOUND_IMPOSSIBLE}"
|
||||
print(info)
|
||||
Result(
|
||||
name=job.name,
|
||||
start_time=Utils.timestamp(),
|
||||
duration=None,
|
||||
status=Result.Status.ERROR,
|
||||
info=ResultInfo.NOT_FOUND_IMPOSSIBLE,
|
||||
).dump()
|
||||
|
||||
result = Result.from_fs(job.name)
|
||||
|
||||
if not result.is_completed():
|
||||
info = f"ERROR: {ResultInfo.KILLED}"
|
||||
print(info)
|
||||
result.set_info(info).set_status(Result.Status.ERROR).dump()
|
||||
|
||||
result.set_files(files=[Settings.RUN_LOG])
|
||||
result.update_duration().dump()
|
||||
|
||||
if result.info and result.status != Result.Status.SUCCESS:
|
||||
# provide job info to workflow level
|
||||
info_errors.append(result.info)
|
||||
|
||||
if run_exit_code == 0:
|
||||
providing_artifacts = []
|
||||
if job.provides and workflow.artifacts:
|
||||
for provides_artifact_name in job.provides:
|
||||
for artifact in workflow.artifacts:
|
||||
if (
|
||||
artifact.name == provides_artifact_name
|
||||
and artifact.type == Artifact.Type.S3
|
||||
):
|
||||
providing_artifacts.append(artifact)
|
||||
if providing_artifacts:
|
||||
print(f"Job provides s3 artifacts [{providing_artifacts}]")
|
||||
for artifact in providing_artifacts:
|
||||
try:
|
||||
assert Shell.check(
|
||||
f"ls -l {artifact.path}", verbose=True
|
||||
), f"Artifact {artifact.path} not found"
|
||||
s3_path = f"{Settings.S3_ARTIFACT_PATH}/{env.get_s3_prefix()}/{Utils.normalize_string(env.JOB_NAME)}"
|
||||
link = S3.copy_file_to_s3(
|
||||
s3_path=s3_path, local_path=artifact.path
|
||||
)
|
||||
result.set_link(link)
|
||||
except Exception as e:
|
||||
error = (
|
||||
f"ERROR: Failed to upload artifact [{artifact}], ex [{e}]"
|
||||
)
|
||||
print(error)
|
||||
info_errors.append(error)
|
||||
result.set_status(Result.Status.ERROR)
|
||||
|
||||
if workflow.enable_cidb:
|
||||
print("Insert results to CIDB")
|
||||
try:
|
||||
CIDB(
|
||||
url=workflow.get_secret(Settings.SECRET_CI_DB_URL).get_value(),
|
||||
passwd=workflow.get_secret(
|
||||
Settings.SECRET_CI_DB_PASSWORD
|
||||
).get_value(),
|
||||
).insert(result)
|
||||
except Exception as ex:
|
||||
error = f"ERROR: Failed to insert data into CI DB, exception [{ex}]"
|
||||
print(error)
|
||||
info_errors.append(error)
|
||||
|
||||
result.dump()
|
||||
|
||||
# always in the end
|
||||
if workflow.enable_cache:
|
||||
print(f"Run CI cache hook")
|
||||
if result.is_ok():
|
||||
CacheRunnerHooks.post_run(workflow, job)
|
||||
|
||||
if workflow.enable_report:
|
||||
print(f"Run html report hook")
|
||||
HtmlRunnerHooks.post_run(workflow, job, info_errors)
|
||||
|
||||
return True
|
||||
|
||||
def run(
|
||||
self, workflow, job, docker="", dummy_env=False, no_docker=False, param=None
|
||||
):
|
||||
res = True
|
||||
setup_env_code = -10
|
||||
prerun_code = -10
|
||||
run_code = -10
|
||||
|
||||
if res and not dummy_env:
|
||||
print(
|
||||
f"\n\n=== Setup env script [{job.name}], workflow [{workflow.name}] ==="
|
||||
)
|
||||
try:
|
||||
setup_env_code = self._setup_env(workflow, job)
|
||||
# Source the bash script and capture the environment variables
|
||||
res = setup_env_code == 0
|
||||
if not res:
|
||||
print(
|
||||
f"ERROR: Setup env script failed with exit code [{setup_env_code}]"
|
||||
)
|
||||
except Exception as e:
|
||||
print(f"ERROR: Setup env script failed with exception [{e}]")
|
||||
traceback.print_exc()
|
||||
print(f"=== Setup env finished ===\n\n")
|
||||
else:
|
||||
self.generate_dummy_environment(workflow, job)
|
||||
|
||||
if res and not dummy_env:
|
||||
res = False
|
||||
print(f"=== Pre run script [{job.name}], workflow [{workflow.name}] ===")
|
||||
try:
|
||||
prerun_code = self._pre_run(workflow, job)
|
||||
res = prerun_code == 0
|
||||
if not res:
|
||||
print(f"ERROR: Pre-run failed with exit code [{prerun_code}]")
|
||||
except Exception as e:
|
||||
print(f"ERROR: Pre-run script failed with exception [{e}]")
|
||||
traceback.print_exc()
|
||||
print(f"=== Pre run finished ===\n\n")
|
||||
|
||||
if res:
|
||||
res = False
|
||||
print(f"=== Run script [{job.name}], workflow [{workflow.name}] ===")
|
||||
try:
|
||||
run_code = self._run(
|
||||
workflow, job, docker=docker, no_docker=no_docker, param=param
|
||||
)
|
||||
res = run_code == 0
|
||||
if not res:
|
||||
print(f"ERROR: Run failed with exit code [{run_code}]")
|
||||
except Exception as e:
|
||||
print(f"ERROR: Run script failed with exception [{e}]")
|
||||
traceback.print_exc()
|
||||
print(f"=== Run scrip finished ===\n\n")
|
||||
|
||||
if not dummy_env:
|
||||
print(f"=== Post run script [{job.name}], workflow [{workflow.name}] ===")
|
||||
self._post_run(workflow, job, setup_env_code, prerun_code, run_code)
|
||||
print(f"=== Post run scrip finished ===")
|
||||
|
||||
if not res:
|
||||
sys.exit(1)
|
35
praktika/runtime.py
Normal file
35
praktika/runtime.py
Normal file
@ -0,0 +1,35 @@
|
||||
from dataclasses import dataclass
|
||||
from typing import Dict, List
|
||||
|
||||
from praktika.cache import Cache
|
||||
from praktika.settings import Settings
|
||||
from praktika.utils import MetaClasses, Utils
|
||||
|
||||
|
||||
@dataclass
|
||||
class RunConfig(MetaClasses.Serializable):
|
||||
name: str
|
||||
digest_jobs: Dict[str, str]
|
||||
digest_dockers: Dict[str, str]
|
||||
cache_success: List[str]
|
||||
# there are might be issue with special characters in job names if used directly in yaml syntax - create base64 encoded list to avoid this
|
||||
cache_success_base64: List[str]
|
||||
cache_artifacts: Dict[str, Cache.CacheRecord]
|
||||
sha: str
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, obj):
|
||||
cache_artifacts = obj["cache_artifacts"]
|
||||
cache_artifacts_deserialized = {}
|
||||
for artifact_name, cache_artifact in cache_artifacts.items():
|
||||
cache_artifacts_deserialized[artifact_name] = Cache.CacheRecord.from_dict(
|
||||
cache_artifact
|
||||
)
|
||||
obj["cache_artifacts"] = cache_artifacts_deserialized
|
||||
return RunConfig(**obj)
|
||||
|
||||
@classmethod
|
||||
def file_name_static(cls, name):
|
||||
return (
|
||||
f"{Settings.TEMP_DIR}/workflow_config_{Utils.normalize_string(name)}.json"
|
||||
)
|
295
praktika/s3.py
Normal file
295
praktika/s3.py
Normal file
@ -0,0 +1,295 @@
|
||||
import dataclasses
|
||||
import json
|
||||
import time
|
||||
from pathlib import Path
|
||||
from typing import Dict
|
||||
|
||||
from praktika._environment import _Environment
|
||||
from praktika.settings import Settings
|
||||
from praktika.utils import Shell, Utils
|
||||
|
||||
|
||||
class S3:
|
||||
@dataclasses.dataclass
|
||||
class Object:
|
||||
AcceptRanges: str
|
||||
Expiration: str
|
||||
LastModified: str
|
||||
ContentLength: int
|
||||
ETag: str
|
||||
ContentType: str
|
||||
ServerSideEncryption: str
|
||||
Metadata: Dict
|
||||
|
||||
def has_tags(self, tags):
|
||||
meta = self.Metadata
|
||||
for k, v in tags.items():
|
||||
if k not in meta or meta[k] != v:
|
||||
print(f"tag [{k}={v}] does not match meta [{meta}]")
|
||||
return False
|
||||
return True
|
||||
|
||||
@classmethod
|
||||
def clean_s3_directory(cls, s3_path):
|
||||
assert len(s3_path.split("/")) > 2, "check to not delete too much"
|
||||
cmd = f"aws s3 rm s3://{s3_path} --recursive"
|
||||
cls.run_command_with_retries(cmd, retries=1)
|
||||
return
|
||||
|
||||
@classmethod
|
||||
def copy_file_to_s3(cls, s3_path, local_path, text=False):
|
||||
assert Path(local_path).exists(), f"Path [{local_path}] does not exist"
|
||||
assert Path(s3_path), f"Invalid S3 Path [{s3_path}]"
|
||||
assert Path(
|
||||
local_path
|
||||
).is_file(), f"Path [{local_path}] is not file. Only files are supported"
|
||||
file_name = Path(local_path).name
|
||||
s3_full_path = s3_path
|
||||
if not s3_full_path.endswith(file_name):
|
||||
s3_full_path = f"{s3_path}/{Path(local_path).name}"
|
||||
cmd = f"aws s3 cp {local_path} s3://{s3_full_path}"
|
||||
if text:
|
||||
cmd += " --content-type text/plain"
|
||||
res = cls.run_command_with_retries(cmd)
|
||||
if not res:
|
||||
raise
|
||||
bucket = s3_path.split("/")[0]
|
||||
endpoint = Settings.S3_BUCKET_TO_HTTP_ENDPOINT[bucket]
|
||||
assert endpoint
|
||||
return f"https://{s3_full_path}".replace(bucket, endpoint)
|
||||
|
||||
@classmethod
|
||||
def put(cls, s3_path, local_path, text=False, metadata=None):
|
||||
assert Path(local_path).exists(), f"Path [{local_path}] does not exist"
|
||||
assert Path(s3_path), f"Invalid S3 Path [{s3_path}]"
|
||||
assert Path(
|
||||
local_path
|
||||
).is_file(), f"Path [{local_path}] is not file. Only files are supported"
|
||||
file_name = Path(local_path).name
|
||||
s3_full_path = s3_path
|
||||
if not s3_full_path.endswith(file_name):
|
||||
s3_full_path = f"{s3_path}/{Path(local_path).name}"
|
||||
|
||||
s3_full_path = str(s3_full_path).removeprefix("s3://")
|
||||
bucket, key = s3_full_path.split("/", maxsplit=1)
|
||||
|
||||
command = (
|
||||
f"aws s3api put-object --bucket {bucket} --key {key} --body {local_path}"
|
||||
)
|
||||
if metadata:
|
||||
for k, v in metadata.items():
|
||||
command += f" --metadata {k}={v}"
|
||||
|
||||
cmd = f"aws s3 cp {local_path} s3://{s3_full_path}"
|
||||
if text:
|
||||
cmd += " --content-type text/plain"
|
||||
res = cls.run_command_with_retries(command)
|
||||
assert res
|
||||
|
||||
@classmethod
|
||||
def run_command_with_retries(cls, command, retries=Settings.MAX_RETRIES_S3):
|
||||
i = 0
|
||||
res = False
|
||||
while not res and i < retries:
|
||||
i += 1
|
||||
ret_code, stdout, stderr = Shell.get_res_stdout_stderr(
|
||||
command, verbose=True
|
||||
)
|
||||
if "aws sso login" in stderr:
|
||||
print("ERROR: aws login expired")
|
||||
break
|
||||
elif "does not exist" in stderr:
|
||||
print("ERROR: requested file does not exist")
|
||||
break
|
||||
if ret_code != 0:
|
||||
print(
|
||||
f"ERROR: aws s3 cp failed, stdout/stderr err: [{stderr}], out [{stdout}]"
|
||||
)
|
||||
res = ret_code == 0
|
||||
return res
|
||||
|
||||
@classmethod
|
||||
def get_link(cls, s3_path, local_path):
|
||||
s3_full_path = f"{s3_path}/{Path(local_path).name}"
|
||||
bucket = s3_path.split("/")[0]
|
||||
endpoint = Settings.S3_BUCKET_TO_HTTP_ENDPOINT[bucket]
|
||||
return f"https://{s3_full_path}".replace(bucket, endpoint)
|
||||
|
||||
@classmethod
|
||||
def copy_file_from_s3(cls, s3_path, local_path):
|
||||
assert Path(s3_path), f"Invalid S3 Path [{s3_path}]"
|
||||
if Path(local_path).is_dir():
|
||||
local_path = Path(local_path) / Path(s3_path).name
|
||||
else:
|
||||
assert Path(
|
||||
local_path
|
||||
).parent.is_dir(), f"Parent path for [{local_path}] does not exist"
|
||||
cmd = f"aws s3 cp s3://{s3_path} {local_path}"
|
||||
res = cls.run_command_with_retries(cmd)
|
||||
return res
|
||||
|
||||
@classmethod
|
||||
def head_object(cls, s3_path):
|
||||
s3_path = str(s3_path).removeprefix("s3://")
|
||||
bucket, key = s3_path.split("/", maxsplit=1)
|
||||
output = Shell.get_output(
|
||||
f"aws s3api head-object --bucket {bucket} --key {key}", verbose=True
|
||||
)
|
||||
if not output:
|
||||
return None
|
||||
else:
|
||||
return cls.Object(**json.loads(output))
|
||||
|
||||
@classmethod
|
||||
def delete(cls, s3_path):
|
||||
assert Path(s3_path), f"Invalid S3 Path [{s3_path}]"
|
||||
return Shell.check(
|
||||
f"aws s3 rm s3://{s3_path}",
|
||||
verbose=True,
|
||||
)
|
||||
|
||||
# TODO: apparently should be placed into separate file to be used only inside praktika
|
||||
# keeping this module clean from importing Settings, Environment and etc, making it easy for use externally
|
||||
@classmethod
|
||||
def copy_result_to_s3(cls, result, unlock=True):
|
||||
result.dump()
|
||||
env = _Environment.get()
|
||||
s3_path = f"{Settings.HTML_S3_PATH}/{env.get_s3_prefix()}"
|
||||
s3_path_full = f"{s3_path}/{Path(result.file_name()).name}"
|
||||
url = S3.copy_file_to_s3(s3_path=s3_path, local_path=result.file_name())
|
||||
if env.PR_NUMBER:
|
||||
print("Duplicate Result for latest commit alias in PR")
|
||||
s3_path = f"{Settings.HTML_S3_PATH}/{env.get_s3_prefix(latest=True)}"
|
||||
url = S3.copy_file_to_s3(s3_path=s3_path, local_path=result.file_name())
|
||||
if unlock:
|
||||
if not cls.unlock(s3_path_full):
|
||||
print(f"ERROR: File [{s3_path_full}] unlock failure")
|
||||
assert False # TODO: investigate
|
||||
return url
|
||||
|
||||
@classmethod
|
||||
def copy_result_from_s3(cls, local_path, lock=True):
|
||||
env = _Environment.get()
|
||||
file_name = Path(local_path).name
|
||||
s3_path = f"{Settings.HTML_S3_PATH}/{env.get_s3_prefix()}/{file_name}"
|
||||
if lock:
|
||||
cls.lock(s3_path)
|
||||
if not S3.copy_file_from_s3(s3_path=s3_path, local_path=local_path):
|
||||
print(f"ERROR: failed to cp file [{s3_path}] from s3")
|
||||
raise
|
||||
|
||||
@classmethod
|
||||
def lock(cls, s3_path, level=0):
|
||||
assert level < 3, "Never"
|
||||
env = _Environment.get()
|
||||
s3_path_lock = s3_path + f".lock"
|
||||
file_path_lock = f"{Settings.TEMP_DIR}/{Path(s3_path_lock).name}"
|
||||
assert Shell.check(
|
||||
f"echo '''{env.JOB_NAME}''' > {file_path_lock}", verbose=True
|
||||
), "Never"
|
||||
|
||||
i = 20
|
||||
meta = S3.head_object(s3_path_lock)
|
||||
while meta:
|
||||
print(f"WARNING: Failed to acquire lock, meta [{meta}] - wait")
|
||||
i -= 5
|
||||
if i < 0:
|
||||
info = f"ERROR: lock acquire failure - unlock forcefully"
|
||||
print(info)
|
||||
env.add_info(info)
|
||||
break
|
||||
time.sleep(5)
|
||||
|
||||
metadata = {"job": Utils.to_base64(env.JOB_NAME)}
|
||||
S3.put(
|
||||
s3_path=s3_path_lock,
|
||||
local_path=file_path_lock,
|
||||
metadata=metadata,
|
||||
)
|
||||
time.sleep(1)
|
||||
obj = S3.head_object(s3_path_lock)
|
||||
if not obj or not obj.has_tags(tags=metadata):
|
||||
print(f"WARNING: locked by another job [{obj}]")
|
||||
env.add_info("S3 lock file failure")
|
||||
cls.lock(s3_path, level=level + 1)
|
||||
print("INFO: lock acquired")
|
||||
|
||||
@classmethod
|
||||
def unlock(cls, s3_path):
|
||||
s3_path_lock = s3_path + ".lock"
|
||||
env = _Environment.get()
|
||||
obj = S3.head_object(s3_path_lock)
|
||||
if not obj:
|
||||
print("ERROR: lock file is removed")
|
||||
assert False # investigate
|
||||
elif not obj.has_tags({"job": Utils.to_base64(env.JOB_NAME)}):
|
||||
print("ERROR: lock file was acquired by another job")
|
||||
assert False # investigate
|
||||
|
||||
if not S3.delete(s3_path_lock):
|
||||
print(f"ERROR: File [{s3_path_lock}] delete failure")
|
||||
print("INFO: lock released")
|
||||
return True
|
||||
|
||||
@classmethod
|
||||
def get_result_link(cls, result):
|
||||
env = _Environment.get()
|
||||
s3_path = f"{Settings.HTML_S3_PATH}/{env.get_s3_prefix(latest=True if env.PR_NUMBER else False)}"
|
||||
return S3.get_link(s3_path=s3_path, local_path=result.file_name())
|
||||
|
||||
@classmethod
|
||||
def clean_latest_result(cls):
|
||||
env = _Environment.get()
|
||||
env.SHA = "latest"
|
||||
assert env.PR_NUMBER
|
||||
s3_path = f"{Settings.HTML_S3_PATH}/{env.get_s3_prefix()}"
|
||||
S3.clean_s3_directory(s3_path=s3_path)
|
||||
|
||||
@classmethod
|
||||
def _upload_file_to_s3(
|
||||
cls, local_file_path, upload_to_s3: bool, text: bool = False, s3_subprefix=""
|
||||
) -> str:
|
||||
if upload_to_s3:
|
||||
env = _Environment.get()
|
||||
s3_path = f"{Settings.HTML_S3_PATH}/{env.get_s3_prefix()}"
|
||||
if s3_subprefix:
|
||||
s3_subprefix.removeprefix("/").removesuffix("/")
|
||||
s3_path += f"/{s3_subprefix}"
|
||||
html_link = S3.copy_file_to_s3(
|
||||
s3_path=s3_path, local_path=local_file_path, text=text
|
||||
)
|
||||
return html_link
|
||||
return f"file://{Path(local_file_path).absolute()}"
|
||||
|
||||
@classmethod
|
||||
def upload_result_files_to_s3(cls, result):
|
||||
if result.results:
|
||||
for result_ in result.results:
|
||||
cls.upload_result_files_to_s3(result_)
|
||||
for file in result.files:
|
||||
if not Path(file).is_file():
|
||||
print(f"ERROR: Invalid file [{file}] in [{result.name}] - skip upload")
|
||||
result.info += f"\nWARNING: Result file [{file}] was not found"
|
||||
file_link = cls._upload_file_to_s3(file, upload_to_s3=False)
|
||||
else:
|
||||
is_text = False
|
||||
for text_file_suffix in Settings.TEXT_CONTENT_EXTENSIONS:
|
||||
if file.endswith(text_file_suffix):
|
||||
print(
|
||||
f"File [{file}] matches Settings.TEXT_CONTENT_EXTENSIONS [{Settings.TEXT_CONTENT_EXTENSIONS}] - add text attribute for s3 object"
|
||||
)
|
||||
is_text = True
|
||||
break
|
||||
file_link = cls._upload_file_to_s3(
|
||||
file,
|
||||
upload_to_s3=True,
|
||||
text=is_text,
|
||||
s3_subprefix=Utils.normalize_string(result.name),
|
||||
)
|
||||
result.links.append(file_link)
|
||||
if result.files:
|
||||
print(
|
||||
f"Result files [{result.files}] uploaded to s3 [{result.links[-len(result.files):]}] - clean files list"
|
||||
)
|
||||
result.files = []
|
||||
result.dump()
|
61
praktika/secret.py
Normal file
61
praktika/secret.py
Normal file
@ -0,0 +1,61 @@
|
||||
import dataclasses
|
||||
import os
|
||||
|
||||
from praktika.utils import Shell
|
||||
|
||||
|
||||
class Secret:
|
||||
class Type:
|
||||
AWS_SSM_VAR = "aws parameter"
|
||||
AWS_SSM_SECRET = "aws secret"
|
||||
GH_SECRET = "gh secret"
|
||||
|
||||
@dataclasses.dataclass
|
||||
class Config:
|
||||
name: str
|
||||
type: str
|
||||
|
||||
def is_gh(self):
|
||||
return self.type == Secret.Type.GH_SECRET
|
||||
|
||||
def get_value(self):
|
||||
if self.type == Secret.Type.AWS_SSM_VAR:
|
||||
return self.get_aws_ssm_var()
|
||||
if self.type == Secret.Type.AWS_SSM_SECRET:
|
||||
return self.get_aws_ssm_secret()
|
||||
elif self.type == Secret.Type.GH_SECRET:
|
||||
return self.get_gh_secret()
|
||||
else:
|
||||
assert False, f"Not supported secret type, secret [{self}]"
|
||||
|
||||
def get_aws_ssm_var(self):
|
||||
res = Shell.get_output(
|
||||
f"aws ssm get-parameter --name {self.name} --with-decryption --output text --query Parameter.Value",
|
||||
)
|
||||
if not res:
|
||||
print(f"ERROR: Failed to get secret [{self.name}]")
|
||||
raise RuntimeError()
|
||||
return res
|
||||
|
||||
def get_aws_ssm_secret(self):
|
||||
name, secret_key_name = self.name, ""
|
||||
if "." in self.name:
|
||||
name, secret_key_name = self.name.split(".")
|
||||
cmd = f"aws secretsmanager get-secret-value --secret-id {name} --query SecretString --output text"
|
||||
if secret_key_name:
|
||||
cmd += f" | jq -r '.[\"{secret_key_name}\"]'"
|
||||
res = Shell.get_output(cmd, verbose=True)
|
||||
if not res:
|
||||
print(f"ERROR: Failed to get secret [{self.name}]")
|
||||
raise RuntimeError()
|
||||
return res
|
||||
|
||||
def get_gh_secret(self):
|
||||
res = os.getenv(f"{self.name}")
|
||||
if not res:
|
||||
print(f"ERROR: Failed to get secret [{self.name}]")
|
||||
raise RuntimeError()
|
||||
return res
|
||||
|
||||
def __repr__(self):
|
||||
return self.name
|
8
praktika/settings.py
Normal file
8
praktika/settings.py
Normal file
@ -0,0 +1,8 @@
|
||||
from praktika._settings import _Settings
|
||||
from praktika.mangle import _get_user_settings
|
||||
|
||||
Settings = _Settings()
|
||||
|
||||
user_settings = _get_user_settings()
|
||||
for setting, value in user_settings.items():
|
||||
Settings.__setattr__(setting, value)
|
597
praktika/utils.py
Normal file
597
praktika/utils.py
Normal file
@ -0,0 +1,597 @@
|
||||
import base64
|
||||
import dataclasses
|
||||
import glob
|
||||
import json
|
||||
import multiprocessing
|
||||
import os
|
||||
import re
|
||||
import signal
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
from abc import ABC, abstractmethod
|
||||
from contextlib import contextmanager
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from threading import Thread
|
||||
from types import SimpleNamespace
|
||||
from typing import Any, Dict, Iterator, List, Optional, Type, TypeVar, Union
|
||||
|
||||
from praktika._settings import _Settings
|
||||
|
||||
T = TypeVar("T", bound="Serializable")
|
||||
|
||||
|
||||
class MetaClasses:
|
||||
class WithIter(type):
|
||||
def __iter__(cls):
|
||||
return (v for k, v in cls.__dict__.items() if not k.startswith("_"))
|
||||
|
||||
@dataclasses.dataclass
|
||||
class Serializable(ABC):
|
||||
@classmethod
|
||||
def to_dict(cls, obj):
|
||||
if dataclasses.is_dataclass(obj):
|
||||
return {k: cls.to_dict(v) for k, v in dataclasses.asdict(obj).items()}
|
||||
elif isinstance(obj, SimpleNamespace):
|
||||
return {k: cls.to_dict(v) for k, v in vars(obj).items()}
|
||||
elif isinstance(obj, list):
|
||||
return [cls.to_dict(i) for i in obj]
|
||||
elif isinstance(obj, dict):
|
||||
return {k: cls.to_dict(v) for k, v in obj.items()}
|
||||
else:
|
||||
return obj
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls: Type[T], obj: Dict[str, Any]) -> T:
|
||||
return cls(**obj)
|
||||
|
||||
@classmethod
|
||||
def from_fs(cls: Type[T], name) -> T:
|
||||
with open(cls.file_name_static(name), "r", encoding="utf8") as f:
|
||||
try:
|
||||
return cls.from_dict(json.load(f))
|
||||
except json.decoder.JSONDecodeError as ex:
|
||||
print(f"ERROR: failed to parse json, ex [{ex}]")
|
||||
print(f"JSON content [{cls.file_name_static(name)}]")
|
||||
Shell.check(f"cat {cls.file_name_static(name)}")
|
||||
raise ex
|
||||
|
||||
@classmethod
|
||||
@abstractmethod
|
||||
def file_name_static(cls, name):
|
||||
pass
|
||||
|
||||
def file_name(self):
|
||||
return self.file_name_static(self.name)
|
||||
|
||||
def dump(self):
|
||||
with open(self.file_name(), "w", encoding="utf8") as f:
|
||||
json.dump(self.to_dict(self), f, indent=4)
|
||||
return self
|
||||
|
||||
@classmethod
|
||||
def exist(cls, name):
|
||||
return Path(cls.file_name_static(name)).is_file()
|
||||
|
||||
def to_json(self, pretty=False):
|
||||
return json.dumps(dataclasses.asdict(self), indent=4 if pretty else None)
|
||||
|
||||
|
||||
class ContextManager:
|
||||
@staticmethod
|
||||
@contextmanager
|
||||
def cd(to: Optional[Union[Path, str]] = None) -> Iterator[None]:
|
||||
"""
|
||||
changes current working directory to @path or `git root` if @path is None
|
||||
:param to:
|
||||
:return:
|
||||
"""
|
||||
if not to:
|
||||
try:
|
||||
to = Shell.get_output_or_raise("git rev-parse --show-toplevel")
|
||||
except:
|
||||
pass
|
||||
if not to:
|
||||
if Path(_Settings.DOCKER_WD).is_dir():
|
||||
to = _Settings.DOCKER_WD
|
||||
if not to:
|
||||
assert False, "FIX IT"
|
||||
assert to
|
||||
old_pwd = os.getcwd()
|
||||
os.chdir(to)
|
||||
try:
|
||||
yield
|
||||
finally:
|
||||
os.chdir(old_pwd)
|
||||
|
||||
|
||||
class Shell:
|
||||
@classmethod
|
||||
def get_output_or_raise(cls, command, verbose=False):
|
||||
return cls.get_output(command, verbose=verbose, strict=True).strip()
|
||||
|
||||
@classmethod
|
||||
def get_output(cls, command, strict=False, verbose=False):
|
||||
if verbose:
|
||||
print(f"Run command [{command}]")
|
||||
res = subprocess.run(
|
||||
command,
|
||||
shell=True,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
text=True,
|
||||
)
|
||||
if res.stderr:
|
||||
print(f"WARNING: stderr: {res.stderr.strip()}")
|
||||
if strict and res.returncode != 0:
|
||||
raise RuntimeError(f"command failed with {res.returncode}")
|
||||
return res.stdout.strip()
|
||||
|
||||
@classmethod
|
||||
def get_res_stdout_stderr(cls, command, verbose=True):
|
||||
if verbose:
|
||||
print(f"Run command [{command}]")
|
||||
res = subprocess.run(
|
||||
command,
|
||||
shell=True,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
text=True,
|
||||
)
|
||||
return res.returncode, res.stdout.strip(), res.stderr.strip()
|
||||
|
||||
@classmethod
|
||||
def check(
|
||||
cls,
|
||||
command,
|
||||
log_file=None,
|
||||
strict=False,
|
||||
verbose=False,
|
||||
dry_run=False,
|
||||
stdin_str=None,
|
||||
timeout=None,
|
||||
retries=0,
|
||||
**kwargs,
|
||||
):
|
||||
return (
|
||||
cls.run(
|
||||
command,
|
||||
log_file,
|
||||
strict,
|
||||
verbose,
|
||||
dry_run,
|
||||
stdin_str,
|
||||
retries=retries,
|
||||
timeout=timeout,
|
||||
**kwargs,
|
||||
)
|
||||
== 0
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def run(
|
||||
cls,
|
||||
command,
|
||||
log_file=None,
|
||||
strict=False,
|
||||
verbose=False,
|
||||
dry_run=False,
|
||||
stdin_str=None,
|
||||
timeout=None,
|
||||
retries=0,
|
||||
**kwargs,
|
||||
):
|
||||
def _check_timeout(timeout, process) -> None:
|
||||
if not timeout:
|
||||
return
|
||||
time.sleep(timeout)
|
||||
print(
|
||||
f"WARNING: Timeout exceeded [{timeout}], sending SIGTERM to process group [{process.pid}]"
|
||||
)
|
||||
try:
|
||||
os.killpg(process.pid, signal.SIGTERM)
|
||||
except ProcessLookupError:
|
||||
print("Process already terminated.")
|
||||
return
|
||||
|
||||
time_wait = 0
|
||||
wait_interval = 5
|
||||
|
||||
# Wait for process to terminate
|
||||
while process.poll() is None and time_wait < 100:
|
||||
print("Waiting for process to exit...")
|
||||
time.sleep(wait_interval)
|
||||
time_wait += wait_interval
|
||||
|
||||
# Force kill if still running
|
||||
if process.poll() is None:
|
||||
print(f"WARNING: Process still running after SIGTERM, sending SIGKILL")
|
||||
try:
|
||||
os.killpg(process.pid, signal.SIGKILL)
|
||||
except ProcessLookupError:
|
||||
print("Process already terminated.")
|
||||
|
||||
# Dry-run
|
||||
if dry_run:
|
||||
print(f"Dry-run. Would run command [{command}]")
|
||||
return 0 # Return success for dry-run
|
||||
|
||||
if verbose:
|
||||
print(f"Run command: [{command}]")
|
||||
|
||||
log_file = log_file or "/dev/null"
|
||||
proc = None
|
||||
for retry in range(retries + 1):
|
||||
try:
|
||||
with open(log_file, "w") as log_fp:
|
||||
proc = subprocess.Popen(
|
||||
command,
|
||||
shell=True,
|
||||
stderr=subprocess.STDOUT,
|
||||
stdout=subprocess.PIPE,
|
||||
stdin=subprocess.PIPE if stdin_str else None,
|
||||
universal_newlines=True,
|
||||
start_new_session=True, # Start a new process group for signal handling
|
||||
bufsize=1, # Line-buffered
|
||||
errors="backslashreplace",
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
# Start the timeout thread if specified
|
||||
if timeout:
|
||||
t = Thread(target=_check_timeout, args=(timeout, proc))
|
||||
t.daemon = True
|
||||
t.start()
|
||||
|
||||
# Write stdin if provided
|
||||
if stdin_str:
|
||||
proc.stdin.write(stdin_str)
|
||||
proc.stdin.close()
|
||||
|
||||
# Process output in real-time
|
||||
if proc.stdout:
|
||||
for line in proc.stdout:
|
||||
sys.stdout.write(line)
|
||||
log_fp.write(line)
|
||||
|
||||
proc.wait() # Wait for the process to finish
|
||||
|
||||
if proc.returncode == 0:
|
||||
break # Exit retry loop if success
|
||||
else:
|
||||
if verbose:
|
||||
print(
|
||||
f"ERROR: command [{command}] failed, exit code: {proc.returncode}, retry: {retry}/{retries}"
|
||||
)
|
||||
except Exception as e:
|
||||
if verbose:
|
||||
print(
|
||||
f"ERROR: command failed, exception: {e}, retry: {retry}/{retries}"
|
||||
)
|
||||
if proc:
|
||||
proc.kill()
|
||||
|
||||
# Handle strict mode (ensure process success or fail)
|
||||
if strict:
|
||||
assert (
|
||||
proc and proc.returncode == 0
|
||||
), f"Command failed with return code {proc.returncode}"
|
||||
|
||||
return proc.returncode if proc else 1 # Return 1 if process never started
|
||||
|
||||
@classmethod
|
||||
def run_async(
|
||||
cls,
|
||||
command,
|
||||
stdin_str=None,
|
||||
verbose=False,
|
||||
suppress_output=False,
|
||||
**kwargs,
|
||||
):
|
||||
if verbose:
|
||||
print(f"Run command in background [{command}]")
|
||||
proc = subprocess.Popen(
|
||||
command,
|
||||
shell=True,
|
||||
stderr=subprocess.STDOUT if not suppress_output else subprocess.DEVNULL,
|
||||
stdout=subprocess.PIPE if not suppress_output else subprocess.DEVNULL,
|
||||
stdin=subprocess.PIPE if stdin_str else None,
|
||||
universal_newlines=True,
|
||||
start_new_session=True,
|
||||
bufsize=1,
|
||||
errors="backslashreplace",
|
||||
**kwargs,
|
||||
)
|
||||
if proc.stdout:
|
||||
for line in proc.stdout:
|
||||
print(line, end="")
|
||||
return proc
|
||||
|
||||
|
||||
class Utils:
|
||||
@staticmethod
|
||||
def terminate_process_group(pid, force=False):
|
||||
if not force:
|
||||
os.killpg(os.getpgid(pid), signal.SIGTERM)
|
||||
else:
|
||||
os.killpg(os.getpgid(pid), signal.SIGKILL)
|
||||
|
||||
@staticmethod
|
||||
def set_env(key, val):
|
||||
os.environ[key] = val
|
||||
|
||||
@staticmethod
|
||||
def print_formatted_error(error_message, stdout="", stderr=""):
|
||||
stdout_lines = stdout.splitlines() if stdout else []
|
||||
stderr_lines = stderr.splitlines() if stderr else []
|
||||
print(f"ERROR: {error_message}")
|
||||
if stdout_lines:
|
||||
print(" Out:")
|
||||
for line in stdout_lines:
|
||||
print(f" | {line}")
|
||||
if stderr_lines:
|
||||
print(" Err:")
|
||||
for line in stderr_lines:
|
||||
print(f" | {line}")
|
||||
|
||||
@staticmethod
|
||||
def sleep(seconds):
|
||||
time.sleep(seconds)
|
||||
|
||||
@staticmethod
|
||||
def cwd():
|
||||
return Path.cwd()
|
||||
|
||||
@staticmethod
|
||||
def cpu_count():
|
||||
return multiprocessing.cpu_count()
|
||||
|
||||
@staticmethod
|
||||
def raise_with_error(error_message, stdout="", stderr=""):
|
||||
Utils.print_formatted_error(error_message, stdout, stderr)
|
||||
raise
|
||||
|
||||
@staticmethod
|
||||
def timestamp():
|
||||
return datetime.utcnow().timestamp()
|
||||
|
||||
@staticmethod
|
||||
def timestamp_to_str(timestamp):
|
||||
return datetime.utcfromtimestamp(timestamp).strftime("%Y-%m-%d %H:%M:%S")
|
||||
|
||||
@staticmethod
|
||||
def get_failed_tests_number(description: str) -> Optional[int]:
|
||||
description = description.lower()
|
||||
|
||||
pattern = r"fail:\s*(\d+)\s*(?=,|$)"
|
||||
match = re.search(pattern, description)
|
||||
if match:
|
||||
return int(match.group(1))
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def is_killed_with_oom():
|
||||
if Shell.check(
|
||||
"sudo dmesg -T | grep -q -e 'Out of memory: Killed process' -e 'oom_reaper: reaped process' -e 'oom-kill:constraint=CONSTRAINT_NONE'"
|
||||
):
|
||||
return True
|
||||
return False
|
||||
|
||||
@staticmethod
|
||||
def clear_dmesg():
|
||||
Shell.check("sudo dmesg --clear", verbose=True)
|
||||
|
||||
@staticmethod
|
||||
def to_base64(value):
|
||||
assert isinstance(value, str), f"TODO: not supported for {type(value)}"
|
||||
string_bytes = value.encode("utf-8")
|
||||
base64_bytes = base64.b64encode(string_bytes)
|
||||
base64_string = base64_bytes.decode("utf-8")
|
||||
return base64_string
|
||||
|
||||
@staticmethod
|
||||
def is_hex(s):
|
||||
try:
|
||||
int(s, 16)
|
||||
return True
|
||||
except ValueError:
|
||||
return False
|
||||
|
||||
@staticmethod
|
||||
def normalize_string(string: str) -> str:
|
||||
res = string.lower()
|
||||
for r in (
|
||||
(" ", "_"),
|
||||
("(", ""),
|
||||
(")", ""),
|
||||
("{", ""),
|
||||
("}", ""),
|
||||
("'", ""),
|
||||
("[", ""),
|
||||
("]", ""),
|
||||
(",", ""),
|
||||
("/", "_"),
|
||||
("-", "_"),
|
||||
(":", ""),
|
||||
('"', ""),
|
||||
):
|
||||
res = res.replace(*r)
|
||||
return res
|
||||
|
||||
@staticmethod
|
||||
def traverse_path(path, file_suffixes=None, sorted=False, not_exists_ok=False):
|
||||
res = []
|
||||
|
||||
def is_valid_file(file):
|
||||
if file_suffixes is None:
|
||||
return True
|
||||
return any(file.endswith(suffix) for suffix in file_suffixes)
|
||||
|
||||
if os.path.isfile(path):
|
||||
if is_valid_file(path):
|
||||
res.append(path)
|
||||
elif os.path.isdir(path):
|
||||
for root, dirs, files in os.walk(path):
|
||||
for file in files:
|
||||
full_path = os.path.join(root, file)
|
||||
if is_valid_file(full_path):
|
||||
res.append(full_path)
|
||||
elif "*" in str(path):
|
||||
res.extend(
|
||||
[
|
||||
f
|
||||
for f in glob.glob(path, recursive=True)
|
||||
if os.path.isfile(f) and is_valid_file(f)
|
||||
]
|
||||
)
|
||||
else:
|
||||
if not_exists_ok:
|
||||
pass
|
||||
else:
|
||||
assert False, f"File does not exist or not valid [{path}]"
|
||||
|
||||
if sorted:
|
||||
res.sort(reverse=True)
|
||||
|
||||
return res
|
||||
|
||||
@classmethod
|
||||
def traverse_paths(
|
||||
cls,
|
||||
include_paths,
|
||||
exclude_paths,
|
||||
file_suffixes=None,
|
||||
sorted=False,
|
||||
not_exists_ok=False,
|
||||
) -> List["str"]:
|
||||
included_files_ = set()
|
||||
for path in include_paths:
|
||||
included_files_.update(cls.traverse_path(path, file_suffixes=file_suffixes))
|
||||
|
||||
excluded_files = set()
|
||||
for path in exclude_paths:
|
||||
res = cls.traverse_path(path, not_exists_ok=not_exists_ok)
|
||||
if not res:
|
||||
print(
|
||||
f"WARNING: Utils.traverse_paths excluded 0 files by path [{path}] in exclude_paths"
|
||||
)
|
||||
else:
|
||||
excluded_files.update(res)
|
||||
res = [f for f in included_files_ if f not in excluded_files]
|
||||
if sorted:
|
||||
res.sort(reverse=True)
|
||||
return res
|
||||
|
||||
@classmethod
|
||||
def add_to_PATH(cls, path):
|
||||
path_cur = os.getenv("PATH", "")
|
||||
if path_cur:
|
||||
path += ":" + path_cur
|
||||
os.environ["PATH"] = path
|
||||
|
||||
class Stopwatch:
|
||||
def __init__(self):
|
||||
self.start_time = datetime.utcnow().timestamp()
|
||||
|
||||
@property
|
||||
def duration(self) -> float:
|
||||
return datetime.utcnow().timestamp() - self.start_time
|
||||
|
||||
|
||||
class TeePopen:
|
||||
def __init__(
|
||||
self,
|
||||
command: str,
|
||||
log_file: Union[str, Path] = "",
|
||||
env: Optional[dict] = None,
|
||||
timeout: Optional[int] = None,
|
||||
):
|
||||
self.command = command
|
||||
self.log_file_name = log_file
|
||||
self.log_file = None
|
||||
self.env = env or os.environ.copy()
|
||||
self.process = None # type: Optional[subprocess.Popen]
|
||||
self.timeout = timeout
|
||||
self.timeout_exceeded = False
|
||||
self.terminated_by_sigterm = False
|
||||
self.terminated_by_sigkill = False
|
||||
|
||||
def _check_timeout(self) -> None:
|
||||
if self.timeout is None:
|
||||
return
|
||||
time.sleep(self.timeout)
|
||||
print(
|
||||
f"WARNING: Timeout exceeded [{self.timeout}], send SIGTERM to [{self.process.pid}] and give a chance for graceful termination"
|
||||
)
|
||||
self.send_signal(signal.SIGTERM)
|
||||
time_wait = 0
|
||||
self.terminated_by_sigterm = True
|
||||
self.timeout_exceeded = True
|
||||
while self.process.poll() is None and time_wait < 100:
|
||||
print("wait...")
|
||||
wait = 5
|
||||
time.sleep(wait)
|
||||
time_wait += wait
|
||||
while self.process.poll() is None:
|
||||
print(f"WARNING: Still running, send SIGKILL to [{self.process.pid}]")
|
||||
self.send_signal(signal.SIGKILL)
|
||||
self.terminated_by_sigkill = True
|
||||
time.sleep(2)
|
||||
|
||||
def __enter__(self) -> "TeePopen":
|
||||
if self.log_file_name:
|
||||
self.log_file = open(self.log_file_name, "w", encoding="utf-8")
|
||||
self.process = subprocess.Popen(
|
||||
self.command,
|
||||
shell=True,
|
||||
universal_newlines=True,
|
||||
env=self.env,
|
||||
start_new_session=True, # signall will be sent to all children
|
||||
stderr=subprocess.STDOUT,
|
||||
stdout=subprocess.PIPE,
|
||||
bufsize=1,
|
||||
errors="backslashreplace",
|
||||
)
|
||||
time.sleep(1)
|
||||
print(f"Subprocess started, pid [{self.process.pid}]")
|
||||
if self.timeout is not None and self.timeout > 0:
|
||||
t = Thread(target=self._check_timeout)
|
||||
t.daemon = True # does not block the program from exit
|
||||
t.start()
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_value, traceback):
|
||||
self.wait()
|
||||
if self.log_file:
|
||||
self.log_file.close()
|
||||
|
||||
def wait(self) -> int:
|
||||
if self.process.stdout is not None:
|
||||
for line in self.process.stdout:
|
||||
sys.stdout.write(line)
|
||||
if self.log_file:
|
||||
self.log_file.write(line)
|
||||
|
||||
return self.process.wait()
|
||||
|
||||
def poll(self):
|
||||
return self.process.poll()
|
||||
|
||||
def send_signal(self, signal_num):
|
||||
os.killpg(self.process.pid, signal_num)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@dataclasses.dataclass
|
||||
class Test(MetaClasses.Serializable):
|
||||
name: str
|
||||
|
||||
@staticmethod
|
||||
def file_name_static(name):
|
||||
return f"/tmp/{Utils.normalize_string(name)}.json"
|
||||
|
||||
Test(name="dsada").dump()
|
||||
t = Test.from_fs("dsada")
|
||||
print(t)
|
208
praktika/validator.py
Normal file
208
praktika/validator.py
Normal file
@ -0,0 +1,208 @@
|
||||
import glob
|
||||
import sys
|
||||
from itertools import chain
|
||||
from pathlib import Path
|
||||
|
||||
from praktika import Workflow
|
||||
from praktika._settings import GHRunners
|
||||
from praktika.mangle import _get_workflows
|
||||
from praktika.settings import Settings
|
||||
from praktika.utils import ContextManager
|
||||
|
||||
|
||||
class Validator:
|
||||
@classmethod
|
||||
def validate(cls):
|
||||
print("---Start validating Pipeline and settings---")
|
||||
workflows = _get_workflows()
|
||||
for workflow in workflows:
|
||||
print(f"Validating workflow [{workflow.name}]")
|
||||
|
||||
cls.validate_file_paths_in_run_command(workflow)
|
||||
cls.validate_file_paths_in_digest_configs(workflow)
|
||||
cls.validate_requirements_txt_files(workflow)
|
||||
cls.validate_dockers(workflow)
|
||||
|
||||
if workflow.artifacts:
|
||||
for artifact in workflow.artifacts:
|
||||
if artifact.is_s3_artifact():
|
||||
assert (
|
||||
Settings.S3_ARTIFACT_PATH
|
||||
), "Provide S3_ARTIFACT_PATH setting in any .py file in ./ci/settings/* to be able to use s3 for artifacts"
|
||||
|
||||
for job in workflow.jobs:
|
||||
if job.requires and workflow.artifacts:
|
||||
for require in job.requires:
|
||||
if (
|
||||
require in workflow.artifacts
|
||||
and workflow.artifacts[require].is_s3_artifact()
|
||||
):
|
||||
assert not any(
|
||||
[r in GHRunners for r in job.runs_on]
|
||||
), f"GH runners [{job.name}:{job.runs_on}] must not be used with S3 as artifact storage"
|
||||
|
||||
if job.allow_merge_on_failure:
|
||||
assert (
|
||||
workflow.enable_merge_ready_status
|
||||
), f"Job property allow_merge_on_failure must be used only with enabled workflow.enable_merge_ready_status, workflow [{workflow.name}], job [{job.name}]"
|
||||
|
||||
if workflow.enable_cache:
|
||||
assert (
|
||||
Settings.CI_CONFIG_RUNS_ON
|
||||
), f"Runner label to run workflow config job must be provided via CACHE_CONFIG_RUNS_ON setting if enable_cache=True, workflow [{workflow.name}]"
|
||||
|
||||
assert (
|
||||
Settings.CACHE_S3_PATH
|
||||
), f"CACHE_S3_PATH Setting must be defined if enable_cache=True, workflow [{workflow.name}]"
|
||||
|
||||
if workflow.dockers:
|
||||
cls.evaluate_check(
|
||||
Settings.DOCKER_BUILD_RUNS_ON,
|
||||
f"DOCKER_BUILD_RUNS_ON settings must be defined if workflow has dockers",
|
||||
workflow_name=workflow.name,
|
||||
)
|
||||
|
||||
if workflow.enable_report:
|
||||
assert (
|
||||
Settings.HTML_S3_PATH
|
||||
), f"HTML_S3_PATH Setting must be defined if enable_html=True, workflow [{workflow.name}]"
|
||||
assert (
|
||||
Settings.S3_BUCKET_TO_HTTP_ENDPOINT
|
||||
), f"S3_BUCKET_TO_HTTP_ENDPOINT Setting must be defined if enable_html=True, workflow [{workflow.name}]"
|
||||
assert (
|
||||
Settings.HTML_S3_PATH.split("/")[0]
|
||||
in Settings.S3_BUCKET_TO_HTTP_ENDPOINT
|
||||
), f"S3_BUCKET_TO_HTTP_ENDPOINT Setting must include bucket name [{Settings.HTML_S3_PATH}] from HTML_S3_PATH, workflow [{workflow.name}]"
|
||||
|
||||
if workflow.enable_cache:
|
||||
for artifact in workflow.artifacts or []:
|
||||
assert (
|
||||
artifact.is_s3_artifact()
|
||||
), f"All artifacts must be of S3 type if enable_cache|enable_html=True, artifact [{artifact.name}], type [{artifact.type}], workflow [{workflow.name}]"
|
||||
|
||||
if workflow.dockers:
|
||||
assert (
|
||||
Settings.DOCKERHUB_USERNAME
|
||||
), f"Settings.DOCKERHUB_USERNAME must be provided if workflow has dockers, workflow [{workflow.name}]"
|
||||
assert (
|
||||
Settings.DOCKERHUB_SECRET
|
||||
), f"Settings.DOCKERHUB_SECRET must be provided if workflow has dockers, workflow [{workflow.name}]"
|
||||
assert workflow.get_secret(
|
||||
Settings.DOCKERHUB_SECRET
|
||||
), f"Secret [{Settings.DOCKERHUB_SECRET}] must have configuration in workflow.secrets, workflow [{workflow.name}]"
|
||||
|
||||
if (
|
||||
workflow.enable_cache
|
||||
or workflow.enable_report
|
||||
or workflow.enable_merge_ready_status
|
||||
):
|
||||
for job in workflow.jobs:
|
||||
assert not any(
|
||||
job in ("ubuntu-latest",) for job in job.runs_on
|
||||
), f"GitHub Runners must not be used for workflow with enabled: workflow.enable_cache, workflow.enable_html or workflow.enable_merge_ready_status as s3 access is required, workflow [{workflow.name}], job [{job.name}]"
|
||||
|
||||
if workflow.enable_cidb:
|
||||
assert (
|
||||
Settings.SECRET_CI_DB_URL
|
||||
), f"Settings.CI_DB_URL_SECRET must be provided if workflow.enable_cidb=True, workflow [{workflow.name}]"
|
||||
assert (
|
||||
Settings.SECRET_CI_DB_PASSWORD
|
||||
), f"Settings.CI_DB_PASSWORD_SECRET must be provided if workflow.enable_cidb=True, workflow [{workflow.name}]"
|
||||
assert (
|
||||
Settings.CI_DB_DB_NAME
|
||||
), f"Settings.CI_DB_DB_NAME must be provided if workflow.enable_cidb=True, workflow [{workflow.name}]"
|
||||
assert (
|
||||
Settings.CI_DB_TABLE_NAME
|
||||
), f"Settings.CI_DB_TABLE_NAME must be provided if workflow.enable_cidb=True, workflow [{workflow.name}]"
|
||||
|
||||
@classmethod
|
||||
def validate_file_paths_in_run_command(cls, workflow: Workflow.Config) -> None:
|
||||
if not Settings.VALIDATE_FILE_PATHS:
|
||||
return
|
||||
with ContextManager.cd():
|
||||
for job in workflow.jobs:
|
||||
run_command = job.command
|
||||
command_parts = run_command.split(" ")
|
||||
for part in command_parts:
|
||||
if ">" in part:
|
||||
return
|
||||
if "/" in part:
|
||||
assert (
|
||||
Path(part).is_file() or Path(part).is_dir()
|
||||
), f"Apparently run command [{run_command}] for job [{job}] has invalid path [{part}]. Setting to disable check: VALIDATE_FILE_PATHS"
|
||||
|
||||
@classmethod
|
||||
def validate_file_paths_in_digest_configs(cls, workflow: Workflow.Config) -> None:
|
||||
if not Settings.VALIDATE_FILE_PATHS:
|
||||
return
|
||||
with ContextManager.cd():
|
||||
for job in workflow.jobs:
|
||||
if not job.digest_config:
|
||||
continue
|
||||
for include_path in chain(
|
||||
job.digest_config.include_paths, job.digest_config.exclude_paths
|
||||
):
|
||||
if "*" in include_path:
|
||||
assert glob.glob(
|
||||
include_path, recursive=True
|
||||
), f"Apparently file glob [{include_path}] in job [{job.name}] digest_config [{job.digest_config}] invalid, workflow [{workflow.name}]. Setting to disable check: VALIDATE_FILE_PATHS"
|
||||
else:
|
||||
assert (
|
||||
Path(include_path).is_file() or Path(include_path).is_dir()
|
||||
), f"Apparently file path [{include_path}] in job [{job.name}] digest_config [{job.digest_config}] invalid, workflow [{workflow.name}]. Setting to disable check: VALIDATE_FILE_PATHS"
|
||||
|
||||
@classmethod
|
||||
def validate_requirements_txt_files(cls, workflow: Workflow.Config) -> None:
|
||||
with ContextManager.cd():
|
||||
for job in workflow.jobs:
|
||||
if job.job_requirements:
|
||||
if job.job_requirements.python_requirements_txt:
|
||||
path = Path(job.job_requirements.python_requirements_txt)
|
||||
message = f"File with py requirement [{path}] does not exist"
|
||||
if job.name in (
|
||||
Settings.DOCKER_BUILD_JOB_NAME,
|
||||
Settings.CI_CONFIG_JOB_NAME,
|
||||
Settings.FINISH_WORKFLOW_JOB_NAME,
|
||||
):
|
||||
message += '\n If all requirements already installed on your runners - add setting INSTALL_PYTHON_REQS_FOR_NATIVE_JOBS""'
|
||||
message += "\n If requirements needs to be installed - add requirements file (Settings.INSTALL_PYTHON_REQS_FOR_NATIVE_JOBS):"
|
||||
message += "\n echo jwt==1.3.1 > ./ci/requirements.txt"
|
||||
message += (
|
||||
"\n echo requests==2.32.3 >> ./ci/requirements.txt"
|
||||
)
|
||||
message += "\n echo https://clickhouse-builds.s3.amazonaws.com/packages/praktika-0.1-py3-none-any.whl >> ./ci/requirements.txt"
|
||||
cls.evaluate_check(
|
||||
path.is_file(), message, job.name, workflow.name
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def validate_dockers(cls, workflow: Workflow.Config):
|
||||
names = []
|
||||
for docker in workflow.dockers:
|
||||
cls.evaluate_check(
|
||||
docker.name not in names,
|
||||
f"Non uniq docker name [{docker.name}]",
|
||||
workflow_name=workflow.name,
|
||||
)
|
||||
names.append(docker.name)
|
||||
for docker in workflow.dockers:
|
||||
for docker_dep in docker.depends_on:
|
||||
cls.evaluate_check(
|
||||
docker_dep in names,
|
||||
f"Docker [{docker.name}] has invalid dependency [{docker_dep}]",
|
||||
workflow_name=workflow.name,
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def evaluate_check(cls, check_ok, message, workflow_name, job_name=""):
|
||||
message = message.split("\n")
|
||||
messages = [message] if not isinstance(message, list) else message
|
||||
if check_ok:
|
||||
return
|
||||
else:
|
||||
print(
|
||||
f"ERROR: Config validation failed: workflow [{workflow_name}], job [{job_name}]:"
|
||||
)
|
||||
for message in messages:
|
||||
print(" || " + message)
|
||||
sys.exit(1)
|
1
praktika/version.py
Normal file
1
praktika/version.py
Normal file
@ -0,0 +1 @@
|
||||
VERSION = 1
|
68
praktika/workflow.py
Normal file
68
praktika/workflow.py
Normal file
@ -0,0 +1,68 @@
|
||||
from dataclasses import dataclass, field
|
||||
from typing import List, Optional
|
||||
|
||||
from praktika import Artifact, Job
|
||||
from praktika.docker import Docker
|
||||
from praktika.secret import Secret
|
||||
from praktika.utils import Utils
|
||||
|
||||
|
||||
class Workflow:
|
||||
class Event:
|
||||
PULL_REQUEST = "pull_request"
|
||||
PUSH = "push"
|
||||
|
||||
@dataclass
|
||||
class Config:
|
||||
"""
|
||||
branches - List of branch names or patterns, for push trigger only
|
||||
base_branches - List of base branches (target branch), for pull_request trigger only
|
||||
"""
|
||||
|
||||
name: str
|
||||
event: str
|
||||
jobs: List[Job.Config]
|
||||
branches: List[str] = field(default_factory=list)
|
||||
base_branches: List[str] = field(default_factory=list)
|
||||
artifacts: List[Artifact.Config] = field(default_factory=list)
|
||||
dockers: List[Docker.Config] = field(default_factory=list)
|
||||
secrets: List[Secret.Config] = field(default_factory=list)
|
||||
enable_cache: bool = False
|
||||
enable_report: bool = False
|
||||
enable_merge_ready_status: bool = False
|
||||
enable_cidb: bool = False
|
||||
|
||||
def is_event_pull_request(self):
|
||||
return self.event == Workflow.Event.PULL_REQUEST
|
||||
|
||||
def is_event_push(self):
|
||||
return self.event == Workflow.Event.PUSH
|
||||
|
||||
def get_job(self, name):
|
||||
job = self.find_job(name)
|
||||
if not job:
|
||||
Utils.raise_with_error(
|
||||
f"Failed to find job [{name}], workflow [{self.name}]"
|
||||
)
|
||||
return job
|
||||
|
||||
def find_job(self, name, lazy=False):
|
||||
name = str(name)
|
||||
for job in self.jobs:
|
||||
if lazy:
|
||||
if name.lower() in job.name.lower():
|
||||
return job
|
||||
else:
|
||||
if job.name == name:
|
||||
return job
|
||||
return None
|
||||
|
||||
def get_secret(self, name) -> Optional[Secret.Config]:
|
||||
name = str(name)
|
||||
names = []
|
||||
for secret in self.secrets:
|
||||
if secret.name == name:
|
||||
return secret
|
||||
names.append(secret.name)
|
||||
print(f"ERROR: Failed to find secret [{name}], workflow secrets [{names}]")
|
||||
raise
|
349
praktika/yaml_generator.py
Normal file
349
praktika/yaml_generator.py
Normal file
@ -0,0 +1,349 @@
|
||||
import dataclasses
|
||||
from typing import List
|
||||
|
||||
from praktika import Artifact, Job, Workflow
|
||||
from praktika.mangle import _get_workflows
|
||||
from praktika.parser import WorkflowConfigParser
|
||||
from praktika.runtime import RunConfig
|
||||
from praktika.settings import Settings
|
||||
from praktika.utils import ContextManager, Shell, Utils
|
||||
|
||||
|
||||
class YamlGenerator:
|
||||
class Templates:
|
||||
TEMPLATE_PULL_REQUEST_0 = """\
|
||||
# generated by praktika
|
||||
|
||||
name: {NAME}
|
||||
|
||||
on:
|
||||
{EVENT}:
|
||||
branches: [{BRANCHES}]
|
||||
|
||||
# Cancel the previous wf run in PRs.
|
||||
concurrency:
|
||||
group: ${{{{{{{{ github.workflow }}}}}}}}-${{{{{{{{ github.ref }}}}}}}}
|
||||
cancel-in-progress: true
|
||||
|
||||
env:
|
||||
# Force the stdout and stderr streams to be unbuffered
|
||||
PYTHONUNBUFFERED: 1
|
||||
GH_TOKEN: ${{{{{{{{ github.token }}}}}}}}
|
||||
|
||||
# Allow updating GH commit statuses and PR comments to post an actual job reports link
|
||||
permissions: write-all
|
||||
|
||||
jobs:
|
||||
{JOBS}\
|
||||
"""
|
||||
|
||||
TEMPLATE_CALLABLE_WORKFLOW = """\
|
||||
# generated by praktika
|
||||
|
||||
name: {NAME}
|
||||
on:
|
||||
workflow_call:
|
||||
inputs:
|
||||
config:
|
||||
type: string
|
||||
required: false
|
||||
default: ''
|
||||
secrets:
|
||||
{SECRETS}
|
||||
|
||||
env:
|
||||
PYTHONUNBUFFERED: 1
|
||||
|
||||
jobs:
|
||||
{JOBS}\
|
||||
"""
|
||||
|
||||
TEMPLATE_SECRET_CONFIG = """\
|
||||
{SECRET_NAME}:
|
||||
required: true
|
||||
"""
|
||||
|
||||
TEMPLATE_MATRIX = """
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
params: {PARAMS_LIST}\
|
||||
"""
|
||||
|
||||
TEMPLATE_JOB_0 = """
|
||||
{JOB_NAME_NORMALIZED}:
|
||||
runs-on: [{RUNS_ON}]
|
||||
needs: [{NEEDS}]{IF_EXPRESSION}
|
||||
name: "{JOB_NAME_GH}"
|
||||
outputs:
|
||||
data: ${{{{ steps.run.outputs.DATA }}}}
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
{JOB_ADDONS}
|
||||
- name: Prepare env script
|
||||
run: |
|
||||
export PYTHONPATH=.:$PYTHONPATH
|
||||
cat > {ENV_SETUP_SCRIPT} << 'ENV_SETUP_SCRIPT_EOF'
|
||||
{SETUP_ENVS}
|
||||
cat > {WORKFLOW_CONFIG_FILE} << 'EOF'
|
||||
${{{{ needs.{WORKFLOW_CONFIG_JOB_NAME}.outputs.data }}}}
|
||||
EOF
|
||||
cat > {WORKFLOW_STATUS_FILE} << 'EOF'
|
||||
${{{{ toJson(needs) }}}}
|
||||
EOF
|
||||
ENV_SETUP_SCRIPT_EOF
|
||||
|
||||
rm -rf {INPUT_DIR} {OUTPUT_DIR} {TEMP_DIR}
|
||||
mkdir -p {TEMP_DIR} {INPUT_DIR} {OUTPUT_DIR}
|
||||
{DOWNLOADS_GITHUB}
|
||||
- name: Run
|
||||
id: run
|
||||
run: |
|
||||
set -o pipefail
|
||||
{PYTHON} -m praktika run --job '''{JOB_NAME}''' --workflow "{WORKFLOW_NAME}" --ci |& tee {RUN_LOG}
|
||||
{UPLOADS_GITHUB}\
|
||||
"""
|
||||
|
||||
TEMPLATE_SETUP_ENV_SECRETS = """\
|
||||
export {SECRET_NAME}=$(cat<<'EOF'
|
||||
${{{{ secrets.{SECRET_NAME} }}}}
|
||||
EOF
|
||||
)\
|
||||
"""
|
||||
|
||||
TEMPLATE_PY_INSTALL = """
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: {PYTHON_VERSION}
|
||||
"""
|
||||
|
||||
TEMPLATE_PY_WITH_REQUIREMENTS = """
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
sudo apt-get update && sudo apt install -y python3-pip
|
||||
# TODO: --break-system-packages? otherwise ubuntu's apt/apt-get complains
|
||||
{PYTHON} -m pip install --upgrade pip --break-system-packages
|
||||
{PIP} install -r {REQUIREMENT_PATH} --break-system-packages
|
||||
"""
|
||||
|
||||
TEMPLATE_GH_UPLOAD = """
|
||||
- name: Upload artifact {NAME}
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: {NAME}
|
||||
path: {PATH}
|
||||
"""
|
||||
|
||||
TEMPLATE_GH_DOWNLOAD = """
|
||||
- name: Download artifact {NAME}
|
||||
uses: actions/download-artifact@v4
|
||||
with:
|
||||
name: {NAME}
|
||||
path: {PATH}
|
||||
"""
|
||||
|
||||
TEMPLATE_IF_EXPRESSION = """
|
||||
if: ${{{{ !failure() && !cancelled() && !contains(fromJson(needs.{WORKFLOW_CONFIG_JOB_NAME}.outputs.data).cache_success_base64, '{JOB_NAME_BASE64}') }}}}\
|
||||
"""
|
||||
|
||||
TEMPLATE_IF_EXPRESSION_SKIPPED_OR_SUCCESS = """
|
||||
if: ${{ !failure() && !cancelled() }}\
|
||||
"""
|
||||
|
||||
TEMPLATE_IF_EXPRESSION_NOT_CANCELLED = """
|
||||
if: ${{ !cancelled() }}\
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.py_workflows = [] # type: List[Workflow.Config]
|
||||
|
||||
@classmethod
|
||||
def _get_workflow_file_name(cls, workflow_name):
|
||||
return f"{Settings.WORKFLOW_PATH_PREFIX}/{Utils.normalize_string(workflow_name)}.yaml"
|
||||
|
||||
def generate(self, workflow_file="", workflow_config=None):
|
||||
print("---Start generating yaml pipelines---")
|
||||
if workflow_config:
|
||||
self.py_workflows = [workflow_config]
|
||||
else:
|
||||
self.py_workflows = _get_workflows(file=workflow_file)
|
||||
assert self.py_workflows
|
||||
for workflow_config in self.py_workflows:
|
||||
print(f"Generate workflow [{workflow_config.name}]")
|
||||
parser = WorkflowConfigParser(workflow_config).parse()
|
||||
if (
|
||||
workflow_config.is_event_pull_request()
|
||||
or workflow_config.is_event_push()
|
||||
):
|
||||
yaml_workflow_str = PullRequestPushYamlGen(parser).generate()
|
||||
else:
|
||||
assert (
|
||||
False
|
||||
), f"Workflow event not yet supported [{workflow_config.event}]"
|
||||
|
||||
with ContextManager.cd():
|
||||
with open(self._get_workflow_file_name(workflow_config.name), "w") as f:
|
||||
f.write(yaml_workflow_str)
|
||||
|
||||
with ContextManager.cd():
|
||||
Shell.check("git add ./.github/workflows/*.yaml")
|
||||
|
||||
|
||||
class PullRequestPushYamlGen:
|
||||
def __init__(self, parser: WorkflowConfigParser):
|
||||
self.workflow_config = parser.workflow_yaml_config
|
||||
self.parser = parser
|
||||
|
||||
def generate(self):
|
||||
job_items = []
|
||||
for i, job in enumerate(self.workflow_config.jobs):
|
||||
job_name_normalized = Utils.normalize_string(job.name)
|
||||
needs = ", ".join(map(Utils.normalize_string, job.needs))
|
||||
job_name = job.name
|
||||
job_addons = []
|
||||
for addon in job.addons:
|
||||
if addon.install_python:
|
||||
job_addons.append(
|
||||
YamlGenerator.Templates.TEMPLATE_PY_INSTALL.format(
|
||||
PYTHON_VERSION=Settings.PYTHON_VERSION
|
||||
)
|
||||
)
|
||||
if addon.requirements_txt_path:
|
||||
job_addons.append(
|
||||
YamlGenerator.Templates.TEMPLATE_PY_WITH_REQUIREMENTS.format(
|
||||
PYTHON=Settings.PYTHON_INTERPRETER,
|
||||
PIP=Settings.PYTHON_PACKET_MANAGER,
|
||||
PYTHON_VERSION=Settings.PYTHON_VERSION,
|
||||
REQUIREMENT_PATH=addon.requirements_txt_path,
|
||||
)
|
||||
)
|
||||
uploads_github = []
|
||||
for artifact in job.artifacts_gh_provides:
|
||||
uploads_github.append(
|
||||
YamlGenerator.Templates.TEMPLATE_GH_UPLOAD.format(
|
||||
NAME=artifact.name, PATH=artifact.path
|
||||
)
|
||||
)
|
||||
downloads_github = []
|
||||
for artifact in job.artifacts_gh_requires:
|
||||
downloads_github.append(
|
||||
YamlGenerator.Templates.TEMPLATE_GH_DOWNLOAD.format(
|
||||
NAME=artifact.name, PATH=Settings.INPUT_DIR
|
||||
)
|
||||
)
|
||||
|
||||
config_job_name_normalized = Utils.normalize_string(
|
||||
Settings.CI_CONFIG_JOB_NAME
|
||||
)
|
||||
|
||||
if_expression = ""
|
||||
if (
|
||||
self.workflow_config.enable_cache
|
||||
and job_name_normalized != config_job_name_normalized
|
||||
):
|
||||
if_expression = YamlGenerator.Templates.TEMPLATE_IF_EXPRESSION.format(
|
||||
WORKFLOW_CONFIG_JOB_NAME=config_job_name_normalized,
|
||||
JOB_NAME_BASE64=Utils.to_base64(job_name),
|
||||
)
|
||||
if job.run_unless_cancelled:
|
||||
if_expression = (
|
||||
YamlGenerator.Templates.TEMPLATE_IF_EXPRESSION_NOT_CANCELLED
|
||||
)
|
||||
|
||||
secrets_envs = []
|
||||
for secret in self.workflow_config.secret_names_gh:
|
||||
secrets_envs.append(
|
||||
YamlGenerator.Templates.TEMPLATE_SETUP_ENV_SECRETS.format(
|
||||
SECRET_NAME=secret
|
||||
)
|
||||
)
|
||||
|
||||
job_item = YamlGenerator.Templates.TEMPLATE_JOB_0.format(
|
||||
JOB_NAME_NORMALIZED=job_name_normalized,
|
||||
WORKFLOW_CONFIG_JOB_NAME=config_job_name_normalized,
|
||||
IF_EXPRESSION=if_expression,
|
||||
RUNS_ON=", ".join(job.runs_on),
|
||||
NEEDS=needs,
|
||||
JOB_NAME_GH=job_name.replace('"', '\\"'),
|
||||
JOB_NAME=job_name.replace(
|
||||
"'", "'\\''"
|
||||
), # ' must be escaped so that yaml commands are properly parsed
|
||||
WORKFLOW_NAME=self.workflow_config.name,
|
||||
ENV_SETUP_SCRIPT=Settings.ENV_SETUP_SCRIPT,
|
||||
SETUP_ENVS="\n".join(secrets_envs),
|
||||
WORKFLOW_CONFIG_FILE=RunConfig.file_name_static(
|
||||
self.workflow_config.name
|
||||
),
|
||||
JOB_ADDONS="".join(job_addons),
|
||||
DOWNLOADS_GITHUB="\n".join(downloads_github),
|
||||
UPLOADS_GITHUB="\n".join(uploads_github),
|
||||
RUN_LOG=Settings.RUN_LOG,
|
||||
PYTHON=Settings.PYTHON_INTERPRETER,
|
||||
WORKFLOW_STATUS_FILE=Settings.WORKFLOW_STATUS_FILE,
|
||||
TEMP_DIR=Settings.TEMP_DIR,
|
||||
INPUT_DIR=Settings.INPUT_DIR,
|
||||
OUTPUT_DIR=Settings.OUTPUT_DIR,
|
||||
)
|
||||
job_items.append(job_item)
|
||||
|
||||
base_template = YamlGenerator.Templates.TEMPLATE_PULL_REQUEST_0
|
||||
template_1 = base_template.strip().format(
|
||||
NAME=self.workflow_config.name,
|
||||
BRANCHES=", ".join(
|
||||
[f"'{branch}'" for branch in self.workflow_config.branches]
|
||||
),
|
||||
EVENT=self.workflow_config.event,
|
||||
JOBS="{}" * len(job_items),
|
||||
)
|
||||
res = template_1.format(*job_items)
|
||||
|
||||
return res
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class AuxConfig:
|
||||
# defines aux step to install dependencies
|
||||
addon: Job.Requirements
|
||||
# defines aux step(s) to upload GH artifacts
|
||||
uploads_gh: List[Artifact.Config]
|
||||
# defines aux step(s) to download GH artifacts
|
||||
downloads_gh: List[Artifact.Config]
|
||||
|
||||
def get_aux_workflow_name(self):
|
||||
suffix = ""
|
||||
if self.addon.python_requirements_txt:
|
||||
suffix += "_py"
|
||||
for _ in self.uploads_gh:
|
||||
suffix += "_uplgh"
|
||||
for _ in self.downloads_gh:
|
||||
suffix += "_dnlgh"
|
||||
return f"{Settings.WORKFLOW_PATH_PREFIX}/aux_job{suffix}.yaml"
|
||||
|
||||
def get_aux_workflow_input(self):
|
||||
res = ""
|
||||
if self.addon.python_requirements_txt:
|
||||
res += f" requirements_txt: {self.addon.python_requirements_txt}"
|
||||
return res
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
WFS = [
|
||||
Workflow.Config(
|
||||
name="PR",
|
||||
event=Workflow.Event.PULL_REQUEST,
|
||||
jobs=[
|
||||
Job.Config(
|
||||
name="Hello World",
|
||||
runs_on=["foo"],
|
||||
command="bar",
|
||||
job_requirements=Job.Requirements(
|
||||
python_requirements_txt="./requirement.txt"
|
||||
),
|
||||
)
|
||||
],
|
||||
enable_cache=True,
|
||||
)
|
||||
]
|
||||
YamlGenerator().generate(workflow_config=WFS)
|
@ -10,6 +10,15 @@ DataTypePtr IAggregateFunction::getStateType() const
|
||||
return std::make_shared<DataTypeAggregateFunction>(shared_from_this(), argument_types, parameters);
|
||||
}
|
||||
|
||||
DataTypePtr IAggregateFunction::getNormalizedStateType() const
|
||||
{
|
||||
DataTypes normalized_argument_types;
|
||||
normalized_argument_types.reserve(argument_types.size());
|
||||
for (const auto & arg : argument_types)
|
||||
normalized_argument_types.emplace_back(arg->getNormalizedType());
|
||||
return std::make_shared<DataTypeAggregateFunction>(shared_from_this(), normalized_argument_types, parameters);
|
||||
}
|
||||
|
||||
String IAggregateFunction::getDescription() const
|
||||
{
|
||||
String description;
|
||||
|
@ -73,7 +73,7 @@ public:
|
||||
virtual DataTypePtr getStateType() const;
|
||||
|
||||
/// Same as the above but normalize state types so that variants with the same binary representation will use the same type.
|
||||
virtual DataTypePtr getNormalizedStateType() const { return getStateType(); }
|
||||
virtual DataTypePtr getNormalizedStateType() const;
|
||||
|
||||
/// Returns true if two aggregate functions have the same state representation in memory and the same serialization,
|
||||
/// so state of one aggregate function can be safely used with another.
|
||||
|
@ -67,19 +67,6 @@ struct HashTableNoState
|
||||
};
|
||||
|
||||
|
||||
/// These functions can be overloaded for custom types.
|
||||
namespace ZeroTraits
|
||||
{
|
||||
|
||||
template <typename T>
|
||||
bool check(const T x) { return x == T{}; }
|
||||
|
||||
template <typename T>
|
||||
void set(T & x) { x = T{}; }
|
||||
|
||||
}
|
||||
|
||||
|
||||
/** Numbers are compared bitwise.
|
||||
* Complex types are compared by operator== as usual (this is important if there are gaps).
|
||||
*
|
||||
@ -87,18 +74,32 @@ void set(T & x) { x = T{}; }
|
||||
* Otherwise the invariants in hash table probing do not met when NaNs are present.
|
||||
*/
|
||||
template <typename T>
|
||||
inline bool bitEquals(T && a, T && b)
|
||||
inline bool bitEquals(T a, T b)
|
||||
{
|
||||
using RealT = std::decay_t<T>;
|
||||
|
||||
if constexpr (std::is_floating_point_v<RealT>)
|
||||
/// Note that memcmp with constant size is compiler builtin.
|
||||
return 0 == memcmp(&a, &b, sizeof(RealT)); /// NOLINT
|
||||
if constexpr (std::is_floating_point_v<T>)
|
||||
/// Note that memcmp with constant size is a compiler builtin.
|
||||
return 0 == memcmp(&a, &b, sizeof(T)); /// NOLINT
|
||||
else
|
||||
return a == b;
|
||||
}
|
||||
|
||||
|
||||
/// These functions can be overloaded for custom types.
|
||||
namespace ZeroTraits
|
||||
{
|
||||
|
||||
template <typename T>
|
||||
bool check(const T x)
|
||||
{
|
||||
return bitEquals(x, T{});
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void set(T & x) { x = T{}; }
|
||||
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* getKey/Mapped -- methods to get key/"mapped" values from the LookupResult returned by find() and
|
||||
* emplace() methods of HashTable. Must not be called for a null LookupResult.
|
||||
|
@ -17,53 +17,53 @@ namespace ErrorCodes
|
||||
/** These settings represent fine tunes for internal details of Coordination storages
|
||||
* and should not be changed by the user without a reason.
|
||||
*/
|
||||
#define LIST_OF_COORDINATION_SETTINGS(M, ALIAS) \
|
||||
M(Milliseconds, min_session_timeout_ms, Coordination::DEFAULT_MIN_SESSION_TIMEOUT_MS, "Min client session timeout", 0) \
|
||||
M(Milliseconds, session_timeout_ms, Coordination::DEFAULT_MAX_SESSION_TIMEOUT_MS, "Max client session timeout", 0) \
|
||||
M(Milliseconds, operation_timeout_ms, Coordination::DEFAULT_OPERATION_TIMEOUT_MS, "Default client operation timeout", 0) \
|
||||
M(Milliseconds, dead_session_check_period_ms, 500, "How often leader will check sessions to consider them dead and remove", 0) \
|
||||
M(Milliseconds, heart_beat_interval_ms, 500, "Heartbeat interval between quorum nodes", 0) \
|
||||
M(Milliseconds, election_timeout_lower_bound_ms, 1000, "Lower bound of election timer (avoid too often leader elections)", 0) \
|
||||
M(Milliseconds, election_timeout_upper_bound_ms, 2000, "Upper bound of election timer (avoid too often leader elections)", 0) \
|
||||
M(Milliseconds, leadership_expiry_ms, 0, "Duration after which a leader will expire if it fails to receive responses from peers. Set it lower or equal to election_timeout_lower_bound_ms to avoid multiple leaders.", 0) \
|
||||
M(UInt64, reserved_log_items, 100000, "How many log items to store (don't remove during compaction)", 0) \
|
||||
M(UInt64, snapshot_distance, 100000, "How many log items we have to collect to write new snapshot", 0) \
|
||||
M(Bool, auto_forwarding, true, "Allow to forward write requests from followers to leader", 0) \
|
||||
M(Milliseconds, shutdown_timeout, 5000, "How much time we will wait until RAFT shutdown", 0) \
|
||||
M(Milliseconds, session_shutdown_timeout, 10000, "How much time we will wait until sessions are closed during shutdown", 0) \
|
||||
M(Milliseconds, startup_timeout, 180000, "How much time we will wait until RAFT to start.", 0) \
|
||||
M(Milliseconds, sleep_before_leader_change_ms, 8000, "How much time we will wait before removing leader (so as leader could commit accepted but non-committed commands and they won't be lost -- leader removal is not synchronized with committing)", 0) \
|
||||
M(LogsLevel, raft_logs_level, LogsLevel::information, "Log internal RAFT logs into main server log level. Valid values: 'trace', 'debug', 'information', 'warning', 'error', 'fatal', 'none'", 0) \
|
||||
M(UInt64, rotate_log_storage_interval, 100000, "How many records will be stored in one log storage file", 0) \
|
||||
M(UInt64, snapshots_to_keep, 3, "How many compressed snapshots to keep on disk", 0) \
|
||||
M(UInt64, stale_log_gap, 10000, "When node became stale and should receive snapshots from leader", 0) \
|
||||
M(UInt64, fresh_log_gap, 200, "When node became fresh", 0) \
|
||||
M(UInt64, max_request_queue_size, 100000, "Maximum number of request that can be in queue for processing", 0) \
|
||||
M(UInt64, max_requests_batch_size, 100, "Max size of batch of requests that can be sent to RAFT", 0) \
|
||||
M(UInt64, max_requests_batch_bytes_size, 100*1024, "Max size in bytes of batch of requests that can be sent to RAFT", 0) \
|
||||
M(UInt64, max_requests_append_size, 100, "Max size of batch of requests that can be sent to replica in append request", 0) \
|
||||
M(UInt64, max_flush_batch_size, 1000, "Max size of batch of requests that can be flushed together", 0) \
|
||||
M(UInt64, max_requests_quick_batch_size, 100, "Max size of batch of requests to try to get before proceeding with RAFT. Keeper will not wait for requests but take only requests that are already in queue" , 0) \
|
||||
M(Bool, quorum_reads, false, "Execute read requests as writes through whole RAFT consesus with similar speed", 0) \
|
||||
M(Bool, force_sync, true, "Call fsync on each change in RAFT changelog", 0) \
|
||||
M(Bool, compress_logs, false, "Write compressed coordination logs in ZSTD format", 0) \
|
||||
M(Bool, compress_snapshots_with_zstd_format, true, "Write compressed snapshots in ZSTD format (instead of custom LZ4)", 0) \
|
||||
M(UInt64, configuration_change_tries_count, 20, "How many times we will try to apply configuration change (add/remove server) to the cluster", 0) \
|
||||
M(UInt64, max_log_file_size, 50 * 1024 * 1024, "Max size of the Raft log file. If possible, each created log file will preallocate this amount of bytes on disk. Set to 0 to disable the limit", 0) \
|
||||
M(UInt64, log_file_overallocate_size, 50 * 1024 * 1024, "If max_log_file_size is not set to 0, this value will be added to it for preallocating bytes on disk. If a log record is larger than this value, it could lead to uncaught out-of-space issues so a larger value is preferred", 0) \
|
||||
M(UInt64, min_request_size_for_cache, 50 * 1024, "Minimal size of the request to cache the deserialization result. Caching can have negative effect on latency for smaller requests, set to 0 to disable", 0) \
|
||||
M(UInt64, raft_limits_reconnect_limit, 50, "If connection to a peer is silent longer than this limit * (multiplied by heartbeat interval), we re-establish the connection.", 0) \
|
||||
M(UInt64, raft_limits_response_limit, 20, "Total wait time for a response is calculated by multiplying response_limit with heart_beat_interval_ms", 0) \
|
||||
M(Bool, async_replication, false, "Enable async replication. All write and read guarantees are preserved while better performance is achieved. Settings is disabled by default to not break backwards compatibility.", 0) \
|
||||
M(Bool, experimental_use_rocksdb, false, "Use rocksdb as backend storage", 0) \
|
||||
M(UInt64, latest_logs_cache_size_threshold, 1 * 1024 * 1024 * 1024, "Maximum total size of in-memory cache of latest log entries.", 0) \
|
||||
M(UInt64, commit_logs_cache_size_threshold, 500 * 1024 * 1024, "Maximum total size of in-memory cache of log entries needed next for commit.", 0) \
|
||||
M(UInt64, disk_move_retries_wait_ms, 1000, "How long to wait between retries after a failure which happened while a file was being moved between disks.", 0) \
|
||||
M(UInt64, disk_move_retries_during_init, 100, "The amount of retries after a failure which happened while a file was being moved between disks during initialization.", 0) \
|
||||
M(UInt64, log_slow_total_threshold_ms, 5000, "Requests for which the total latency is larger than this settings will be logged", 0) \
|
||||
M(UInt64, log_slow_cpu_threshold_ms, 100, "Requests for which the CPU (preprocessing and processing) latency is larger than this settings will be logged", 0) \
|
||||
M(UInt64, log_slow_connection_operation_threshold_ms, 1000, "Log message if a certain operation took too long inside a single connection", 0) \
|
||||
M(Bool, use_xid_64, false, "Enable 64-bit XID. It is disabled by default because of backward compatibility", 0)
|
||||
#define LIST_OF_COORDINATION_SETTINGS(DECLARE, ALIAS) \
|
||||
DECLARE(Milliseconds, min_session_timeout_ms, Coordination::DEFAULT_MIN_SESSION_TIMEOUT_MS, "Min client session timeout", 0) \
|
||||
DECLARE(Milliseconds, session_timeout_ms, Coordination::DEFAULT_MAX_SESSION_TIMEOUT_MS, "Max client session timeout", 0) \
|
||||
DECLARE(Milliseconds, operation_timeout_ms, Coordination::DEFAULT_OPERATION_TIMEOUT_MS, "Default client operation timeout", 0) \
|
||||
DECLARE(Milliseconds, dead_session_check_period_ms, 500, "How often leader will check sessions to consider them dead and remove", 0) \
|
||||
DECLARE(Milliseconds, heart_beat_interval_ms, 500, "Heartbeat interval between quorum nodes", 0) \
|
||||
DECLARE(Milliseconds, election_timeout_lower_bound_ms, 1000, "Lower bound of election timer (avoid too often leader elections)", 0) \
|
||||
DECLARE(Milliseconds, election_timeout_upper_bound_ms, 2000, "Upper bound of election timer (avoid too often leader elections)", 0) \
|
||||
DECLARE(Milliseconds, leadership_expiry_ms, 0, "Duration after which a leader will expire if it fails to receive responses from peers. Set it lower or equal to election_timeout_lower_bound_ms to avoid multiple leaders.", 0) \
|
||||
DECLARE(UInt64, reserved_log_items, 100000, "How many log items to store (don't remove during compaction)", 0) \
|
||||
DECLARE(UInt64, snapshot_distance, 100000, "How many log items we have to collect to write new snapshot", 0) \
|
||||
DECLARE(Bool, auto_forwarding, true, "Allow to forward write requests from followers to leader", 0) \
|
||||
DECLARE(Milliseconds, shutdown_timeout, 5000, "How much time we will wait until RAFT shutdown", 0) \
|
||||
DECLARE(Milliseconds, session_shutdown_timeout, 10000, "How much time we will wait until sessions are closed during shutdown", 0) \
|
||||
DECLARE(Milliseconds, startup_timeout, 180000, "How much time we will wait until RAFT to start.", 0) \
|
||||
DECLARE(Milliseconds, sleep_before_leader_change_ms, 8000, "How much time we will wait before removing leader (so as leader could commit accepted but non-committed commands and they won't be lost -- leader removal is not synchronized with committing)", 0) \
|
||||
DECLARE(LogsLevel, raft_logs_level, LogsLevel::information, "Log internal RAFT logs into main server log level. Valid values: 'trace', 'debug', 'information', 'warning', 'error', 'fatal', 'none'", 0) \
|
||||
DECLARE(UInt64, rotate_log_storage_interval, 100000, "How many records will be stored in one log storage file", 0) \
|
||||
DECLARE(UInt64, snapshots_to_keep, 3, "How many compressed snapshots to keep on disk", 0) \
|
||||
DECLARE(UInt64, stale_log_gap, 10000, "When node became stale and should receive snapshots from leader", 0) \
|
||||
DECLARE(UInt64, fresh_log_gap, 200, "When node became fresh", 0) \
|
||||
DECLARE(UInt64, max_request_queue_size, 100000, "Maximum number of request that can be in queue for processing", 0) \
|
||||
DECLARE(UInt64, max_requests_batch_size, 100, "Max size of batch of requests that can be sent to RAFT", 0) \
|
||||
DECLARE(UInt64, max_requests_batch_bytes_size, 100*1024, "Max size in bytes of batch of requests that can be sent to RAFT", 0) \
|
||||
DECLARE(UInt64, max_requests_append_size, 100, "Max size of batch of requests that can be sent to replica in append request", 0) \
|
||||
DECLARE(UInt64, max_flush_batch_size, 1000, "Max size of batch of requests that can be flushed together", 0) \
|
||||
DECLARE(UInt64, max_requests_quick_batch_size, 100, "Max size of batch of requests to try to get before proceeding with RAFT. Keeper will not wait for requests but take only requests that are already in queue" , 0) \
|
||||
DECLARE(Bool, quorum_reads, false, "Execute read requests as writes through whole RAFT consesus with similar speed", 0) \
|
||||
DECLARE(Bool, force_sync, true, "Call fsync on each change in RAFT changelog", 0) \
|
||||
DECLARE(Bool, compress_logs, false, "Write compressed coordination logs in ZSTD format", 0) \
|
||||
DECLARE(Bool, compress_snapshots_with_zstd_format, true, "Write compressed snapshots in ZSTD format (instead of custom LZ4)", 0) \
|
||||
DECLARE(UInt64, configuration_change_tries_count, 20, "How many times we will try to apply configuration change (add/remove server) to the cluster", 0) \
|
||||
DECLARE(UInt64, max_log_file_size, 50 * 1024 * 1024, "Max size of the Raft log file. If possible, each created log file will preallocate this amount of bytes on disk. Set to 0 to disable the limit", 0) \
|
||||
DECLARE(UInt64, log_file_overallocate_size, 50 * 1024 * 1024, "If max_log_file_size is not set to 0, this value will be added to it for preallocating bytes on disk. If a log record is larger than this value, it could lead to uncaught out-of-space issues so a larger value is preferred", 0) \
|
||||
DECLARE(UInt64, min_request_size_for_cache, 50 * 1024, "Minimal size of the request to cache the deserialization result. Caching can have negative effect on latency for smaller requests, set to 0 to disable", 0) \
|
||||
DECLARE(UInt64, raft_limits_reconnect_limit, 50, "If connection to a peer is silent longer than this limit * (multiplied by heartbeat interval), we re-establish the connection.", 0) \
|
||||
DECLARE(UInt64, raft_limits_response_limit, 20, "Total wait time for a response is calculated by multiplying response_limit with heart_beat_interval_ms", 0) \
|
||||
DECLARE(Bool, async_replication, false, "Enable async replication. All write and read guarantees are preserved while better performance is achieved. Settings is disabled by default to not break backwards compatibility.", 0) \
|
||||
DECLARE(Bool, experimental_use_rocksdb, false, "Use rocksdb as backend storage", 0) \
|
||||
DECLARE(UInt64, latest_logs_cache_size_threshold, 1 * 1024 * 1024 * 1024, "Maximum total size of in-memory cache of latest log entries.", 0) \
|
||||
DECLARE(UInt64, commit_logs_cache_size_threshold, 500 * 1024 * 1024, "Maximum total size of in-memory cache of log entries needed next for commit.", 0) \
|
||||
DECLARE(UInt64, disk_move_retries_wait_ms, 1000, "How long to wait between retries after a failure which happened while a file was being moved between disks.", 0) \
|
||||
DECLARE(UInt64, disk_move_retries_during_init, 100, "The amount of retries after a failure which happened while a file was being moved between disks during initialization.", 0) \
|
||||
DECLARE(UInt64, log_slow_total_threshold_ms, 5000, "Requests for which the total latency is larger than this settings will be logged", 0) \
|
||||
DECLARE(UInt64, log_slow_cpu_threshold_ms, 100, "Requests for which the CPU (preprocessing and processing) latency is larger than this settings will be logged", 0) \
|
||||
DECLARE(UInt64, log_slow_connection_operation_threshold_ms, 1000, "Log message if a certain operation took too long inside a single connection", 0) \
|
||||
DECLARE(Bool, use_xid_64, false, "Enable 64-bit XID. It is disabled by default because of backward compatibility", 0)
|
||||
|
||||
DECLARE_SETTINGS_TRAITS(CoordinationSettingsTraits, LIST_OF_COORDINATION_SETTINGS)
|
||||
IMPLEMENT_SETTINGS_TRAITS(CoordinationSettingsTraits, LIST_OF_COORDINATION_SETTINGS)
|
||||
|
@ -1,8 +1,8 @@
|
||||
#pragma once
|
||||
|
||||
#include <Core/LogsLevel.h>
|
||||
#include <libnuraft/nuraft.hxx>
|
||||
#include <Common/logger_useful.h>
|
||||
#include <Core/SettingsEnums.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
@ -22,6 +22,9 @@ class ReadBuffer;
|
||||
class WriteBuffer;
|
||||
|
||||
/** Template class to define collections of settings.
|
||||
* If you create a new setting, please also add it to ./utils/check-style/check-settings-style
|
||||
* for validation
|
||||
*
|
||||
* Example of usage:
|
||||
*
|
||||
* mysettings.h:
|
||||
@ -49,10 +52,10 @@ class WriteBuffer;
|
||||
* #include <Core/BaseSettings.h>
|
||||
* #include <Core/BaseSettingsFwdMacrosImpl.h>
|
||||
*
|
||||
* #define APPLY_FOR_MYSETTINGS(M) \
|
||||
* M(UInt64, a, 100, "Description of a", 0) \
|
||||
* M(Float, f, 3.11, "Description of f", IMPORTANT) // IMPORTANT - means the setting can't be ignored by older versions) \
|
||||
* M(String, s, "default", "Description of s", 0)
|
||||
* #define APPLY_FOR_MYSETTINGS(DECLARE, ALIAS) \
|
||||
* DECLARE(UInt64, a, 100, "Description of a", 0) \
|
||||
* DECLARE(Float, f, 3.11, "Description of f", IMPORTANT) // IMPORTANT - means the setting can't be ignored by older versions) \
|
||||
* DECLARE(String, s, "default", "Description of s", 0)
|
||||
*
|
||||
* DECLARE_SETTINGS_TRAITS(MySettingsTraits, APPLY_FOR_MYSETTINGS)
|
||||
* IMPLEMENT_SETTINGS_TRAITS(MySettingsTraits, APPLY_FOR_MYSETTINGS)
|
||||
|
@ -1,5 +1,5 @@
|
||||
#include <Core/BaseSettings.h>
|
||||
#include <Core/FormatFactorySettingsDeclaration.h>
|
||||
#include <Core/FormatFactorySettings.h>
|
||||
#include <Core/SettingsEnums.h>
|
||||
|
||||
namespace DB
|
||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -26,170 +26,170 @@ extern const Metric BackgroundMessageBrokerSchedulePoolSize;
|
||||
namespace DB
|
||||
{
|
||||
|
||||
#define LIST_OF_SERVER_SETTINGS(M, ALIAS) \
|
||||
M(Bool, show_addresses_in_stack_traces, true, "If it is set true will show addresses in stack traces", 0) \
|
||||
M(Bool, shutdown_wait_unfinished_queries, false, "If set true ClickHouse will wait for running queries finish before shutdown.", 0) \
|
||||
M(UInt64, shutdown_wait_unfinished, 5, "Delay in seconds to wait for unfinished queries", 0) \
|
||||
M(UInt64, max_thread_pool_size, 10000, "The maximum number of threads that could be allocated from the OS and used for query execution and background operations.", 0) \
|
||||
M(UInt64, max_thread_pool_free_size, 1000, "The maximum number of threads that will always stay in a global thread pool once allocated and remain idle in case of insufficient number of tasks.", 0) \
|
||||
M(UInt64, thread_pool_queue_size, 10000, "The maximum number of tasks that will be placed in a queue and wait for execution.", 0) \
|
||||
M(UInt64, max_io_thread_pool_size, 100, "The maximum number of threads that would be used for IO operations", 0) \
|
||||
M(UInt64, max_io_thread_pool_free_size, 0, "Max free size for IO thread pool.", 0) \
|
||||
M(UInt64, io_thread_pool_queue_size, 10000, "Queue size for IO thread pool.", 0) \
|
||||
M(UInt64, max_active_parts_loading_thread_pool_size, 64, "The number of threads to load active set of data parts (Active ones) at startup.", 0) \
|
||||
M(UInt64, max_outdated_parts_loading_thread_pool_size, 32, "The number of threads to load inactive set of data parts (Outdated ones) at startup.", 0) \
|
||||
M(UInt64, max_unexpected_parts_loading_thread_pool_size, 8, "The number of threads to load inactive set of data parts (Unexpected ones) at startup.", 0) \
|
||||
M(UInt64, max_parts_cleaning_thread_pool_size, 128, "The number of threads for concurrent removal of inactive data parts.", 0) \
|
||||
M(UInt64, max_mutations_bandwidth_for_server, 0, "The maximum read speed of all mutations on server in bytes per second. Zero means unlimited.", 0) \
|
||||
M(UInt64, max_merges_bandwidth_for_server, 0, "The maximum read speed of all merges on server in bytes per second. Zero means unlimited.", 0) \
|
||||
M(UInt64, max_replicated_fetches_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for replicated fetches. Zero means unlimited.", 0) \
|
||||
M(UInt64, max_replicated_sends_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for replicated sends. Zero means unlimited.", 0) \
|
||||
M(UInt64, max_remote_read_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for read. Zero means unlimited.", 0) \
|
||||
M(UInt64, max_remote_write_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for write. Zero means unlimited.", 0) \
|
||||
M(UInt64, max_local_read_bandwidth_for_server, 0, "The maximum speed of local reads in bytes per second. Zero means unlimited.", 0) \
|
||||
M(UInt64, max_local_write_bandwidth_for_server, 0, "The maximum speed of local writes in bytes per second. Zero means unlimited.", 0) \
|
||||
M(UInt64, max_backups_io_thread_pool_size, 1000, "The maximum number of threads that would be used for IO operations for BACKUP queries", 0) \
|
||||
M(UInt64, max_backups_io_thread_pool_free_size, 0, "Max free size for backups IO thread pool.", 0) \
|
||||
M(UInt64, backups_io_thread_pool_queue_size, 0, "Queue size for backups IO thread pool.", 0) \
|
||||
M(UInt64, backup_threads, 16, "The maximum number of threads to execute BACKUP requests.", 0) \
|
||||
M(UInt64, max_backup_bandwidth_for_server, 0, "The maximum read speed in bytes per second for all backups on server. Zero means unlimited.", 0) \
|
||||
M(UInt64, restore_threads, 16, "The maximum number of threads to execute RESTORE requests.", 0) \
|
||||
M(Bool, shutdown_wait_backups_and_restores, true, "If set to true ClickHouse will wait for running backups and restores to finish before shutdown.", 0) \
|
||||
M(Double, cannot_allocate_thread_fault_injection_probability, 0, "For testing purposes.", 0) \
|
||||
M(Int32, max_connections, 1024, "Max server connections.", 0) \
|
||||
M(UInt32, asynchronous_metrics_update_period_s, 1, "Period in seconds for updating asynchronous metrics.", 0) \
|
||||
M(UInt32, asynchronous_heavy_metrics_update_period_s, 120, "Period in seconds for updating heavy asynchronous metrics.", 0) \
|
||||
M(String, default_database, "default", "Default database name.", 0) \
|
||||
M(String, tmp_policy, "", "Policy for storage with temporary data.", 0) \
|
||||
M(UInt64, max_temporary_data_on_disk_size, 0, "The maximum amount of storage that could be used for external aggregation, joins or sorting.", 0) \
|
||||
M(String, temporary_data_in_cache, "", "Cache disk name for temporary data.", 0) \
|
||||
M(UInt64, aggregate_function_group_array_max_element_size, 0xFFFFFF, "Max array element size in bytes for groupArray function. This limit is checked at serialization and help to avoid large state size.", 0) \
|
||||
M(GroupArrayActionWhenLimitReached, aggregate_function_group_array_action_when_limit_is_reached, GroupArrayActionWhenLimitReached::THROW, "Action to execute when max array element size is exceeded in groupArray: `throw` exception, or `discard` extra values", 0) \
|
||||
M(UInt64, max_server_memory_usage, 0, "Maximum total memory usage of the server in bytes. Zero means unlimited.", 0) \
|
||||
M(Double, max_server_memory_usage_to_ram_ratio, 0.9, "Same as max_server_memory_usage but in to RAM ratio. Allows to lower max memory on low-memory systems.", 0) \
|
||||
M(UInt64, merges_mutations_memory_usage_soft_limit, 0, "Maximum total memory usage for merges and mutations in bytes. Zero means unlimited.", 0) \
|
||||
M(Double, merges_mutations_memory_usage_to_ram_ratio, 0.5, "Same as merges_mutations_memory_usage_soft_limit but in to RAM ratio. Allows to lower memory limit on low-memory systems.", 0) \
|
||||
M(Bool, allow_use_jemalloc_memory, true, "Allows to use jemalloc memory.", 0) \
|
||||
M(UInt64, cgroups_memory_usage_observer_wait_time, 15, "Polling interval in seconds to read the current memory usage from cgroups. Zero means disabled.", 0) \
|
||||
M(Double, cgroup_memory_watcher_hard_limit_ratio, 0.95, "Hard memory limit ratio for cgroup memory usage observer", 0) \
|
||||
M(Double, cgroup_memory_watcher_soft_limit_ratio, 0.9, "Soft memory limit ratio limit for cgroup memory usage observer", 0) \
|
||||
M(UInt64, async_insert_threads, 16, "Maximum number of threads to actually parse and insert data in background. Zero means asynchronous mode is disabled", 0) \
|
||||
M(Bool, async_insert_queue_flush_on_shutdown, true, "If true queue of asynchronous inserts is flushed on graceful shutdown", 0) \
|
||||
M(Bool, ignore_empty_sql_security_in_create_view_query, true, "If true, ClickHouse doesn't write defaults for empty SQL security statement in CREATE VIEW queries. This setting is only necessary for the migration period and will become obsolete in 24.4", 0) \
|
||||
M(UInt64, max_build_vector_similarity_index_thread_pool_size, 16, "The maximum number of threads to use to build vector similarity indexes. 0 means all cores.", 0) \
|
||||
#define LIST_OF_SERVER_SETTINGS(DECLARE, ALIAS) \
|
||||
DECLARE(Bool, show_addresses_in_stack_traces, true, "If it is set true will show addresses in stack traces", 0) \
|
||||
DECLARE(Bool, shutdown_wait_unfinished_queries, false, "If set true ClickHouse will wait for running queries finish before shutdown.", 0) \
|
||||
DECLARE(UInt64, shutdown_wait_unfinished, 5, "Delay in seconds to wait for unfinished queries", 0) \
|
||||
DECLARE(UInt64, max_thread_pool_size, 10000, "The maximum number of threads that could be allocated from the OS and used for query execution and background operations.", 0) \
|
||||
DECLARE(UInt64, max_thread_pool_free_size, 1000, "The maximum number of threads that will always stay in a global thread pool once allocated and remain idle in case of insufficient number of tasks.", 0) \
|
||||
DECLARE(UInt64, thread_pool_queue_size, 10000, "The maximum number of tasks that will be placed in a queue and wait for execution.", 0) \
|
||||
DECLARE(UInt64, max_io_thread_pool_size, 100, "The maximum number of threads that would be used for IO operations", 0) \
|
||||
DECLARE(UInt64, max_io_thread_pool_free_size, 0, "Max free size for IO thread pool.", 0) \
|
||||
DECLARE(UInt64, io_thread_pool_queue_size, 10000, "Queue size for IO thread pool.", 0) \
|
||||
DECLARE(UInt64, max_active_parts_loading_thread_pool_size, 64, "The number of threads to load active set of data parts (Active ones) at startup.", 0) \
|
||||
DECLARE(UInt64, max_outdated_parts_loading_thread_pool_size, 32, "The number of threads to load inactive set of data parts (Outdated ones) at startup.", 0) \
|
||||
DECLARE(UInt64, max_unexpected_parts_loading_thread_pool_size, 8, "The number of threads to load inactive set of data parts (Unexpected ones) at startup.", 0) \
|
||||
DECLARE(UInt64, max_parts_cleaning_thread_pool_size, 128, "The number of threads for concurrent removal of inactive data parts.", 0) \
|
||||
DECLARE(UInt64, max_mutations_bandwidth_for_server, 0, "The maximum read speed of all mutations on server in bytes per second. Zero means unlimited.", 0) \
|
||||
DECLARE(UInt64, max_merges_bandwidth_for_server, 0, "The maximum read speed of all merges on server in bytes per second. Zero means unlimited.", 0) \
|
||||
DECLARE(UInt64, max_replicated_fetches_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for replicated fetches. Zero means unlimited.", 0) \
|
||||
DECLARE(UInt64, max_replicated_sends_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for replicated sends. Zero means unlimited.", 0) \
|
||||
DECLARE(UInt64, max_remote_read_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for read. Zero means unlimited.", 0) \
|
||||
DECLARE(UInt64, max_remote_write_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for write. Zero means unlimited.", 0) \
|
||||
DECLARE(UInt64, max_local_read_bandwidth_for_server, 0, "The maximum speed of local reads in bytes per second. Zero means unlimited.", 0) \
|
||||
DECLARE(UInt64, max_local_write_bandwidth_for_server, 0, "The maximum speed of local writes in bytes per second. Zero means unlimited.", 0) \
|
||||
DECLARE(UInt64, max_backups_io_thread_pool_size, 1000, "The maximum number of threads that would be used for IO operations for BACKUP queries", 0) \
|
||||
DECLARE(UInt64, max_backups_io_thread_pool_free_size, 0, "Max free size for backups IO thread pool.", 0) \
|
||||
DECLARE(UInt64, backups_io_thread_pool_queue_size, 0, "Queue size for backups IO thread pool.", 0) \
|
||||
DECLARE(UInt64, backup_threads, 16, "The maximum number of threads to execute BACKUP requests.", 0) \
|
||||
DECLARE(UInt64, max_backup_bandwidth_for_server, 0, "The maximum read speed in bytes per second for all backups on server. Zero means unlimited.", 0) \
|
||||
DECLARE(UInt64, restore_threads, 16, "The maximum number of threads to execute RESTORE requests.", 0) \
|
||||
DECLARE(Bool, shutdown_wait_backups_and_restores, true, "If set to true ClickHouse will wait for running backups and restores to finish before shutdown.", 0) \
|
||||
DECLARE(Double, cannot_allocate_thread_fault_injection_probability, 0, "For testing purposes.", 0) \
|
||||
DECLARE(Int32, max_connections, 1024, "Max server connections.", 0) \
|
||||
DECLARE(UInt32, asynchronous_metrics_update_period_s, 1, "Period in seconds for updating asynchronous metrics.", 0) \
|
||||
DECLARE(UInt32, asynchronous_heavy_metrics_update_period_s, 120, "Period in seconds for updating heavy asynchronous metrics.", 0) \
|
||||
DECLARE(String, default_database, "default", "Default database name.", 0) \
|
||||
DECLARE(String, tmp_policy, "", "Policy for storage with temporary data.", 0) \
|
||||
DECLARE(UInt64, max_temporary_data_on_disk_size, 0, "The maximum amount of storage that could be used for external aggregation, joins or sorting.", 0) \
|
||||
DECLARE(String, temporary_data_in_cache, "", "Cache disk name for temporary data.", 0) \
|
||||
DECLARE(UInt64, aggregate_function_group_array_max_element_size, 0xFFFFFF, "Max array element size in bytes for groupArray function. This limit is checked at serialization and help to avoid large state size.", 0) \
|
||||
DECLARE(GroupArrayActionWhenLimitReached, aggregate_function_group_array_action_when_limit_is_reached, GroupArrayActionWhenLimitReached::THROW, "Action to execute when max array element size is exceeded in groupArray: `throw` exception, or `discard` extra values", 0) \
|
||||
DECLARE(UInt64, max_server_memory_usage, 0, "Maximum total memory usage of the server in bytes. Zero means unlimited.", 0) \
|
||||
DECLARE(Double, max_server_memory_usage_to_ram_ratio, 0.9, "Same as max_server_memory_usage but in to RAM ratio. Allows to lower max memory on low-memory systems.", 0) \
|
||||
DECLARE(UInt64, merges_mutations_memory_usage_soft_limit, 0, "Maximum total memory usage for merges and mutations in bytes. Zero means unlimited.", 0) \
|
||||
DECLARE(Double, merges_mutations_memory_usage_to_ram_ratio, 0.5, "Same as merges_mutations_memory_usage_soft_limit but in to RAM ratio. Allows to lower memory limit on low-memory systems.", 0) \
|
||||
DECLARE(Bool, allow_use_jemalloc_memory, true, "Allows to use jemalloc memory.", 0) \
|
||||
DECLARE(UInt64, cgroups_memory_usage_observer_wait_time, 15, "Polling interval in seconds to read the current memory usage from cgroups. Zero means disabled.", 0) \
|
||||
DECLARE(Double, cgroup_memory_watcher_hard_limit_ratio, 0.95, "Hard memory limit ratio for cgroup memory usage observer", 0) \
|
||||
DECLARE(Double, cgroup_memory_watcher_soft_limit_ratio, 0.9, "Soft memory limit ratio limit for cgroup memory usage observer", 0) \
|
||||
DECLARE(UInt64, async_insert_threads, 16, "Maximum number of threads to actually parse and insert data in background. Zero means asynchronous mode is disabled", 0) \
|
||||
DECLARE(Bool, async_insert_queue_flush_on_shutdown, true, "If true queue of asynchronous inserts is flushed on graceful shutdown", 0) \
|
||||
DECLARE(Bool, ignore_empty_sql_security_in_create_view_query, true, "If true, ClickHouse doesn't write defaults for empty SQL security statement in CREATE VIEW queries. This setting is only necessary for the migration period and will become obsolete in 24.4", 0) \
|
||||
DECLARE(UInt64, max_build_vector_similarity_index_thread_pool_size, 16, "The maximum number of threads to use to build vector similarity indexes. 0 means all cores.", 0) \
|
||||
\
|
||||
/* Database Catalog */ \
|
||||
M(UInt64, database_atomic_delay_before_drop_table_sec, 8 * 60, "The delay during which a dropped table can be restored using the UNDROP statement. If DROP TABLE ran with a SYNC modifier, the setting is ignored.", 0) \
|
||||
M(UInt64, database_catalog_unused_dir_hide_timeout_sec, 60 * 60, "Parameter of a task that cleans up garbage from store/ directory. If some subdirectory is not used by clickhouse-server and this directory was not modified for last database_catalog_unused_dir_hide_timeout_sec seconds, the task will 'hide' this directory by removing all access rights. It also works for directories that clickhouse-server does not expect to see inside store/. Zero means 'immediately'.", 0) \
|
||||
M(UInt64, database_catalog_unused_dir_rm_timeout_sec, 30 * 24 * 60 * 60, "Parameter of a task that cleans up garbage from store/ directory. If some subdirectory is not used by clickhouse-server and it was previously 'hidden' (see database_catalog_unused_dir_hide_timeout_sec) and this directory was not modified for last database_catalog_unused_dir_rm_timeout_sec seconds, the task will remove this directory. It also works for directories that clickhouse-server does not expect to see inside store/. Zero means 'never'.", 0) \
|
||||
M(UInt64, database_catalog_unused_dir_cleanup_period_sec, 24 * 60 * 60, "Parameter of a task that cleans up garbage from store/ directory. Sets scheduling period of the task. Zero means 'never'.", 0) \
|
||||
M(UInt64, database_catalog_drop_error_cooldown_sec, 5, "In case if drop table failed, ClickHouse will wait for this timeout before retrying the operation.", 0) \
|
||||
M(UInt64, database_catalog_drop_table_concurrency, 16, "The size of the threadpool used for dropping tables.", 0) \
|
||||
DECLARE(UInt64, database_atomic_delay_before_drop_table_sec, 8 * 60, "The delay during which a dropped table can be restored using the UNDROP statement. If DROP TABLE ran with a SYNC modifier, the setting is ignored.", 0) \
|
||||
DECLARE(UInt64, database_catalog_unused_dir_hide_timeout_sec, 60 * 60, "Parameter of a task that cleans up garbage from store/ directory. If some subdirectory is not used by clickhouse-server and this directory was not modified for last database_catalog_unused_dir_hide_timeout_sec seconds, the task will 'hide' this directory by removing all access rights. It also works for directories that clickhouse-server does not expect to see inside store/. Zero means 'immediately'.", 0) \
|
||||
DECLARE(UInt64, database_catalog_unused_dir_rm_timeout_sec, 30 * 24 * 60 * 60, "Parameter of a task that cleans up garbage from store/ directory. If some subdirectory is not used by clickhouse-server and it was previously 'hidden' (see database_catalog_unused_dir_hide_timeout_sec) and this directory was not modified for last database_catalog_unused_dir_rm_timeout_sec seconds, the task will remove this directory. It also works for directories that clickhouse-server does not expect to see inside store/. Zero means 'never'.", 0) \
|
||||
DECLARE(UInt64, database_catalog_unused_dir_cleanup_period_sec, 24 * 60 * 60, "Parameter of a task that cleans up garbage from store/ directory. Sets scheduling period of the task. Zero means 'never'.", 0) \
|
||||
DECLARE(UInt64, database_catalog_drop_error_cooldown_sec, 5, "In case if drop table failed, ClickHouse will wait for this timeout before retrying the operation.", 0) \
|
||||
DECLARE(UInt64, database_catalog_drop_table_concurrency, 16, "The size of the threadpool used for dropping tables.", 0) \
|
||||
\
|
||||
\
|
||||
M(UInt64, max_concurrent_queries, 0, "Maximum number of concurrently executed queries. Zero means unlimited.", 0) \
|
||||
M(UInt64, max_concurrent_insert_queries, 0, "Maximum number of concurrently INSERT queries. Zero means unlimited.", 0) \
|
||||
M(UInt64, max_concurrent_select_queries, 0, "Maximum number of concurrently SELECT queries. Zero means unlimited.", 0) \
|
||||
M(UInt64, max_waiting_queries, 0, "Maximum number of concurrently waiting queries blocked due to `async_load_databases`. Note that waiting queries are not considered by `max_concurrent_*queries*` limits. Zero means unlimited.", 0) \
|
||||
DECLARE(UInt64, max_concurrent_queries, 0, "Maximum number of concurrently executed queries. Zero means unlimited.", 0) \
|
||||
DECLARE(UInt64, max_concurrent_insert_queries, 0, "Maximum number of concurrently INSERT queries. Zero means unlimited.", 0) \
|
||||
DECLARE(UInt64, max_concurrent_select_queries, 0, "Maximum number of concurrently SELECT queries. Zero means unlimited.", 0) \
|
||||
DECLARE(UInt64, max_waiting_queries, 0, "Maximum number of concurrently waiting queries blocked due to `async_load_databases`. Note that waiting queries are not considered by `max_concurrent_*queries*` limits. Zero means unlimited.", 0) \
|
||||
\
|
||||
M(Double, cache_size_to_ram_max_ratio, 0.5, "Set cache size to RAM max ratio. Allows to lower cache size on low-memory systems.", 0) \
|
||||
M(String, uncompressed_cache_policy, DEFAULT_UNCOMPRESSED_CACHE_POLICY, "Uncompressed cache policy name.", 0) \
|
||||
M(UInt64, uncompressed_cache_size, DEFAULT_UNCOMPRESSED_CACHE_MAX_SIZE, "Size of cache for uncompressed blocks. Zero means disabled.", 0) \
|
||||
M(Double, uncompressed_cache_size_ratio, DEFAULT_UNCOMPRESSED_CACHE_SIZE_RATIO, "The size of the protected queue in the uncompressed cache relative to the cache's total size.", 0) \
|
||||
M(String, mark_cache_policy, DEFAULT_MARK_CACHE_POLICY, "Mark cache policy name.", 0) \
|
||||
M(UInt64, mark_cache_size, DEFAULT_MARK_CACHE_MAX_SIZE, "Size of cache for marks (index of MergeTree family of tables).", 0) \
|
||||
M(Double, mark_cache_size_ratio, DEFAULT_MARK_CACHE_SIZE_RATIO, "The size of the protected queue in the mark cache relative to the cache's total size.", 0) \
|
||||
M(String, index_uncompressed_cache_policy, DEFAULT_INDEX_UNCOMPRESSED_CACHE_POLICY, "Secondary index uncompressed cache policy name.", 0) \
|
||||
M(UInt64, index_uncompressed_cache_size, DEFAULT_INDEX_UNCOMPRESSED_CACHE_MAX_SIZE, "Size of cache for uncompressed blocks of secondary indices. Zero means disabled.", 0) \
|
||||
M(Double, index_uncompressed_cache_size_ratio, DEFAULT_INDEX_UNCOMPRESSED_CACHE_SIZE_RATIO, "The size of the protected queue in the secondary index uncompressed cache relative to the cache's total size.", 0) \
|
||||
M(String, index_mark_cache_policy, DEFAULT_INDEX_MARK_CACHE_POLICY, "Secondary index mark cache policy name.", 0) \
|
||||
M(UInt64, index_mark_cache_size, DEFAULT_INDEX_MARK_CACHE_MAX_SIZE, "Size of cache for secondary index marks. Zero means disabled.", 0) \
|
||||
M(Double, index_mark_cache_size_ratio, DEFAULT_INDEX_MARK_CACHE_SIZE_RATIO, "The size of the protected queue in the secondary index mark cache relative to the cache's total size.", 0) \
|
||||
M(UInt64, page_cache_chunk_size, 2 << 20, "Bytes per chunk in userspace page cache. Rounded up to a multiple of page size (typically 4 KiB) or huge page size (typically 2 MiB, only if page_cache_use_thp is enabled).", 0) \
|
||||
M(UInt64, page_cache_mmap_size, 1 << 30, "Bytes per memory mapping in userspace page cache. Not important.", 0) \
|
||||
M(UInt64, page_cache_size, 0, "Amount of virtual memory to map for userspace page cache. If page_cache_use_madv_free is enabled, it's recommended to set this higher than the machine's RAM size. Use 0 to disable userspace page cache.", 0) \
|
||||
M(Bool, page_cache_use_madv_free, DBMS_DEFAULT_PAGE_CACHE_USE_MADV_FREE, "If true, the userspace page cache will allow the OS to automatically reclaim memory from the cache on memory pressure (using MADV_FREE).", 0) \
|
||||
M(Bool, page_cache_use_transparent_huge_pages, true, "Userspace will attempt to use transparent huge pages on Linux. This is best-effort.", 0) \
|
||||
M(UInt64, mmap_cache_size, DEFAULT_MMAP_CACHE_MAX_SIZE, "A cache for mmapped files.", 0) \
|
||||
M(UInt64, compiled_expression_cache_size, DEFAULT_COMPILED_EXPRESSION_CACHE_MAX_SIZE, "Byte size of compiled expressions cache.", 0) \
|
||||
M(UInt64, compiled_expression_cache_elements_size, DEFAULT_COMPILED_EXPRESSION_CACHE_MAX_ENTRIES, "Maximum entries in compiled expressions cache.", 0) \
|
||||
DECLARE(Double, cache_size_to_ram_max_ratio, 0.5, "Set cache size to RAM max ratio. Allows to lower cache size on low-memory systems.", 0) \
|
||||
DECLARE(String, uncompressed_cache_policy, DEFAULT_UNCOMPRESSED_CACHE_POLICY, "Uncompressed cache policy name.", 0) \
|
||||
DECLARE(UInt64, uncompressed_cache_size, DEFAULT_UNCOMPRESSED_CACHE_MAX_SIZE, "Size of cache for uncompressed blocks. Zero means disabled.", 0) \
|
||||
DECLARE(Double, uncompressed_cache_size_ratio, DEFAULT_UNCOMPRESSED_CACHE_SIZE_RATIO, "The size of the protected queue in the uncompressed cache relative to the cache's total size.", 0) \
|
||||
DECLARE(String, mark_cache_policy, DEFAULT_MARK_CACHE_POLICY, "Mark cache policy name.", 0) \
|
||||
DECLARE(UInt64, mark_cache_size, DEFAULT_MARK_CACHE_MAX_SIZE, "Size of cache for marks (index of MergeTree family of tables).", 0) \
|
||||
DECLARE(Double, mark_cache_size_ratio, DEFAULT_MARK_CACHE_SIZE_RATIO, "The size of the protected queue in the mark cache relative to the cache's total size.", 0) \
|
||||
DECLARE(String, index_uncompressed_cache_policy, DEFAULT_INDEX_UNCOMPRESSED_CACHE_POLICY, "Secondary index uncompressed cache policy name.", 0) \
|
||||
DECLARE(UInt64, index_uncompressed_cache_size, DEFAULT_INDEX_UNCOMPRESSED_CACHE_MAX_SIZE, "Size of cache for uncompressed blocks of secondary indices. Zero means disabled.", 0) \
|
||||
DECLARE(Double, index_uncompressed_cache_size_ratio, DEFAULT_INDEX_UNCOMPRESSED_CACHE_SIZE_RATIO, "The size of the protected queue in the secondary index uncompressed cache relative to the cache's total size.", 0) \
|
||||
DECLARE(String, index_mark_cache_policy, DEFAULT_INDEX_MARK_CACHE_POLICY, "Secondary index mark cache policy name.", 0) \
|
||||
DECLARE(UInt64, index_mark_cache_size, DEFAULT_INDEX_MARK_CACHE_MAX_SIZE, "Size of cache for secondary index marks. Zero means disabled.", 0) \
|
||||
DECLARE(Double, index_mark_cache_size_ratio, DEFAULT_INDEX_MARK_CACHE_SIZE_RATIO, "The size of the protected queue in the secondary index mark cache relative to the cache's total size.", 0) \
|
||||
DECLARE(UInt64, page_cache_chunk_size, 2 << 20, "Bytes per chunk in userspace page cache. Rounded up to a multiple of page size (typically 4 KiB) or huge page size (typically 2 MiB, only if page_cache_use_thp is enabled).", 0) \
|
||||
DECLARE(UInt64, page_cache_mmap_size, 1 << 30, "Bytes per memory mapping in userspace page cache. Not important.", 0) \
|
||||
DECLARE(UInt64, page_cache_size, 0, "Amount of virtual memory to map for userspace page cache. If page_cache_use_madv_free is enabled, it's recommended to set this higher than the machine's RAM size. Use 0 to disable userspace page cache.", 0) \
|
||||
DECLARE(Bool, page_cache_use_madv_free, DBMS_DEFAULT_PAGE_CACHE_USE_MADV_FREE, "If true, the userspace page cache will allow the OS to automatically reclaim memory from the cache on memory pressure (using MADV_FREE).", 0) \
|
||||
DECLARE(Bool, page_cache_use_transparent_huge_pages, true, "Userspace will attempt to use transparent huge pages on Linux. This is best-effort.", 0) \
|
||||
DECLARE(UInt64, mmap_cache_size, DEFAULT_MMAP_CACHE_MAX_SIZE, "A cache for mmapped files.", 0) \
|
||||
DECLARE(UInt64, compiled_expression_cache_size, DEFAULT_COMPILED_EXPRESSION_CACHE_MAX_SIZE, "Byte size of compiled expressions cache.", 0) \
|
||||
DECLARE(UInt64, compiled_expression_cache_elements_size, DEFAULT_COMPILED_EXPRESSION_CACHE_MAX_ENTRIES, "Maximum entries in compiled expressions cache.", 0) \
|
||||
\
|
||||
M(Bool, disable_internal_dns_cache, false, "Disable internal DNS caching at all.", 0) \
|
||||
M(UInt64, dns_cache_max_entries, 10000, "Internal DNS cache max entries.", 0) \
|
||||
M(Int32, dns_cache_update_period, 15, "Internal DNS cache update period in seconds.", 0) \
|
||||
M(UInt32, dns_max_consecutive_failures, 10, "Max DNS resolve failures of a hostname before dropping the hostname from ClickHouse DNS cache.", 0) \
|
||||
M(Bool, dns_allow_resolve_names_to_ipv4, true, "Allows resolve names to ipv4 addresses.", 0) \
|
||||
M(Bool, dns_allow_resolve_names_to_ipv6, true, "Allows resolve names to ipv6 addresses.", 0) \
|
||||
DECLARE(Bool, disable_internal_dns_cache, false, "Disable internal DNS caching at all.", 0) \
|
||||
DECLARE(UInt64, dns_cache_max_entries, 10000, "Internal DNS cache max entries.", 0) \
|
||||
DECLARE(Int32, dns_cache_update_period, 15, "Internal DNS cache update period in seconds.", 0) \
|
||||
DECLARE(UInt32, dns_max_consecutive_failures, 10, "Max DNS resolve failures of a hostname before dropping the hostname from ClickHouse DNS cache.", 0) \
|
||||
DECLARE(Bool, dns_allow_resolve_names_to_ipv4, true, "Allows resolve names to ipv4 addresses.", 0) \
|
||||
DECLARE(Bool, dns_allow_resolve_names_to_ipv6, true, "Allows resolve names to ipv6 addresses.", 0) \
|
||||
\
|
||||
M(UInt64, max_table_size_to_drop, 50000000000lu, "If size of a table is greater than this value (in bytes) than table could not be dropped with any DROP query.", 0) \
|
||||
M(UInt64, max_partition_size_to_drop, 50000000000lu, "Same as max_table_size_to_drop, but for the partitions.", 0) \
|
||||
M(UInt64, max_table_num_to_warn, 5000lu, "If the number of tables is greater than this value, the server will create a warning that will displayed to user.", 0) \
|
||||
M(UInt64, max_view_num_to_warn, 10000lu, "If the number of views is greater than this value, the server will create a warning that will displayed to user.", 0) \
|
||||
M(UInt64, max_dictionary_num_to_warn, 1000lu, "If the number of dictionaries is greater than this value, the server will create a warning that will displayed to user.", 0) \
|
||||
M(UInt64, max_database_num_to_warn, 1000lu, "If the number of databases is greater than this value, the server will create a warning that will displayed to user.", 0) \
|
||||
M(UInt64, max_part_num_to_warn, 100000lu, "If the number of parts is greater than this value, the server will create a warning that will displayed to user.", 0) \
|
||||
M(UInt64, max_table_num_to_throw, 0lu, "If number of tables is greater than this value, server will throw an exception. 0 means no limitation. View, remote tables, dictionary, system tables are not counted. Only count table in Atomic/Ordinary/Replicated/Lazy database engine.", 0) \
|
||||
M(UInt64, max_database_num_to_throw, 0lu, "If number of databases is greater than this value, server will throw an exception. 0 means no limitation.", 0) \
|
||||
M(UInt64, max_authentication_methods_per_user, 100, "The maximum number of authentication methods a user can be created with or altered. Changing this setting does not affect existing users. Zero means unlimited", 0) \
|
||||
M(UInt64, concurrent_threads_soft_limit_num, 0, "Sets how many concurrent thread can be allocated before applying CPU pressure. Zero means unlimited.", 0) \
|
||||
M(UInt64, concurrent_threads_soft_limit_ratio_to_cores, 0, "Same as concurrent_threads_soft_limit_num, but with ratio to cores.", 0) \
|
||||
DECLARE(UInt64, max_table_size_to_drop, 50000000000lu, "If size of a table is greater than this value (in bytes) than table could not be dropped with any DROP query.", 0) \
|
||||
DECLARE(UInt64, max_partition_size_to_drop, 50000000000lu, "Same as max_table_size_to_drop, but for the partitions.", 0) \
|
||||
DECLARE(UInt64, max_table_num_to_warn, 5000lu, "If the number of tables is greater than this value, the server will create a warning that will displayed to user.", 0) \
|
||||
DECLARE(UInt64, max_view_num_to_warn, 10000lu, "If the number of views is greater than this value, the server will create a warning that will displayed to user.", 0) \
|
||||
DECLARE(UInt64, max_dictionary_num_to_warn, 1000lu, "If the number of dictionaries is greater than this value, the server will create a warning that will displayed to user.", 0) \
|
||||
DECLARE(UInt64, max_database_num_to_warn, 1000lu, "If the number of databases is greater than this value, the server will create a warning that will displayed to user.", 0) \
|
||||
DECLARE(UInt64, max_part_num_to_warn, 100000lu, "If the number of parts is greater than this value, the server will create a warning that will displayed to user.", 0) \
|
||||
DECLARE(UInt64, max_table_num_to_throw, 0lu, "If number of tables is greater than this value, server will throw an exception. 0 means no limitation. View, remote tables, dictionary, system tables are not counted. Only count table in Atomic/Ordinary/Replicated/Lazy database engine.", 0) \
|
||||
DECLARE(UInt64, max_database_num_to_throw, 0lu, "If number of databases is greater than this value, server will throw an exception. 0 means no limitation.", 0) \
|
||||
DECLARE(UInt64, max_authentication_methods_per_user, 100, "The maximum number of authentication methods a user can be created with or altered. Changing this setting does not affect existing users. Zero means unlimited", 0) \
|
||||
DECLARE(UInt64, concurrent_threads_soft_limit_num, 0, "Sets how many concurrent thread can be allocated before applying CPU pressure. Zero means unlimited.", 0) \
|
||||
DECLARE(UInt64, concurrent_threads_soft_limit_ratio_to_cores, 0, "Same as concurrent_threads_soft_limit_num, but with ratio to cores.", 0) \
|
||||
\
|
||||
M(UInt64, background_pool_size, 16, "The maximum number of threads what will be used for merging or mutating data parts for *MergeTree-engine tables in a background.", 0) \
|
||||
M(Float, background_merges_mutations_concurrency_ratio, 2, "The number of part mutation tasks that can be executed concurrently by each thread in background pool.", 0) \
|
||||
M(String, background_merges_mutations_scheduling_policy, "round_robin", "The policy on how to perform a scheduling for background merges and mutations. Possible values are: `round_robin` and `shortest_task_first`. ", 0) \
|
||||
M(UInt64, background_move_pool_size, 8, "The maximum number of threads that will be used for moving data parts to another disk or volume for *MergeTree-engine tables in a background.", 0) \
|
||||
M(UInt64, background_fetches_pool_size, 16, "The maximum number of threads that will be used for fetching data parts from another replica for *MergeTree-engine tables in a background.", 0) \
|
||||
M(UInt64, background_common_pool_size, 8, "The maximum number of threads that will be used for performing a variety of operations (mostly garbage collection) for *MergeTree-engine tables in a background.", 0) \
|
||||
M(UInt64, background_buffer_flush_schedule_pool_size, 16, "The maximum number of threads that will be used for performing flush operations for Buffer-engine tables in a background.", 0) \
|
||||
M(UInt64, background_schedule_pool_size, 512, "The maximum number of threads that will be used for constantly executing some lightweight periodic operations.", 0) \
|
||||
M(UInt64, background_message_broker_schedule_pool_size, 16, "The maximum number of threads that will be used for executing background operations for message streaming.", 0) \
|
||||
M(UInt64, background_distributed_schedule_pool_size, 16, "The maximum number of threads that will be used for executing distributed sends.", 0) \
|
||||
M(UInt64, tables_loader_foreground_pool_size, 0, "The maximum number of threads that will be used for foreground (that is being waited for by a query) loading of tables. Also used for synchronous loading of tables before the server start. Zero means use all CPUs.", 0) \
|
||||
M(UInt64, tables_loader_background_pool_size, 0, "The maximum number of threads that will be used for background async loading of tables. Zero means use all CPUs.", 0) \
|
||||
M(Bool, async_load_databases, false, "Enable asynchronous loading of databases and tables to speedup server startup. Queries to not yet loaded entity will be blocked until load is finished.", 0) \
|
||||
M(Bool, display_secrets_in_show_and_select, false, "Allow showing secrets in SHOW and SELECT queries via a format setting and a grant", 0) \
|
||||
M(Seconds, keep_alive_timeout, DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT, "The number of seconds that ClickHouse waits for incoming requests before closing the connection.", 0) \
|
||||
M(UInt64, max_keep_alive_requests, 10000, "The maximum number of requests handled via a single http keepalive connection before the server closes this connection.", 0) \
|
||||
M(Seconds, replicated_fetches_http_connection_timeout, 0, "HTTP connection timeout for part fetch requests. Inherited from default profile `http_connection_timeout` if not set explicitly.", 0) \
|
||||
M(Seconds, replicated_fetches_http_send_timeout, 0, "HTTP send timeout for part fetch requests. Inherited from default profile `http_send_timeout` if not set explicitly.", 0) \
|
||||
M(Seconds, replicated_fetches_http_receive_timeout, 0, "HTTP receive timeout for fetch part requests. Inherited from default profile `http_receive_timeout` if not set explicitly.", 0) \
|
||||
M(UInt64, total_memory_profiler_step, 0, "Whenever server memory usage becomes larger than every next step in number of bytes the memory profiler will collect the allocating stack trace. Zero means disabled memory profiler. Values lower than a few megabytes will slow down server.", 0) \
|
||||
M(Double, total_memory_tracker_sample_probability, 0, "Collect random allocations and deallocations and write them into system.trace_log with 'MemorySample' trace_type. The probability is for every alloc/free regardless to the size of the allocation (can be changed with `memory_profiler_sample_min_allocation_size` and `memory_profiler_sample_max_allocation_size`). Note that sampling happens only when the amount of untracked memory exceeds 'max_untracked_memory'. You may want to set 'max_untracked_memory' to 0 for extra fine grained sampling.", 0) \
|
||||
M(UInt64, total_memory_profiler_sample_min_allocation_size, 0, "Collect random allocations of size greater or equal than specified value with probability equal to `total_memory_profiler_sample_probability`. 0 means disabled. You may want to set 'max_untracked_memory' to 0 to make this threshold to work as expected.", 0) \
|
||||
M(UInt64, total_memory_profiler_sample_max_allocation_size, 0, "Collect random allocations of size less or equal than specified value with probability equal to `total_memory_profiler_sample_probability`. 0 means disabled. You may want to set 'max_untracked_memory' to 0 to make this threshold to work as expected.", 0) \
|
||||
M(Bool, validate_tcp_client_information, false, "Validate client_information in the query packet over the native TCP protocol.", 0) \
|
||||
M(Bool, storage_metadata_write_full_object_key, false, "Write disk metadata files with VERSION_FULL_OBJECT_KEY format", 0) \
|
||||
M(UInt64, max_materialized_views_count_for_table, 0, "A limit on the number of materialized views attached to a table.", 0) \
|
||||
M(UInt32, max_database_replicated_create_table_thread_pool_size, 1, "The number of threads to create tables during replica recovery in DatabaseReplicated. Zero means number of threads equal number of cores.", 0) \
|
||||
M(Bool, database_replicated_allow_detach_permanently, true, "Allow detaching tables permanently in Replicated databases", 0) \
|
||||
M(Bool, format_alter_operations_with_parentheses, false, "If enabled, each operation in alter queries will be surrounded with parentheses in formatted queries to make them less ambiguous.", 0) \
|
||||
M(String, default_replica_path, "/clickhouse/tables/{uuid}/{shard}", "The path to the table in ZooKeeper", 0) \
|
||||
M(String, default_replica_name, "{replica}", "The replica name in ZooKeeper", 0) \
|
||||
M(UInt64, disk_connections_soft_limit, 5000, "Connections above this limit have significantly shorter time to live. The limit applies to the disks connections.", 0) \
|
||||
M(UInt64, disk_connections_warn_limit, 10000, "Warning massages are written to the logs if number of in-use connections are higher than this limit. The limit applies to the disks connections.", 0) \
|
||||
M(UInt64, disk_connections_store_limit, 30000, "Connections above this limit reset after use. Set to 0 to turn connection cache off. The limit applies to the disks connections.", 0) \
|
||||
M(UInt64, storage_connections_soft_limit, 100, "Connections above this limit have significantly shorter time to live. The limit applies to the storages connections.", 0) \
|
||||
M(UInt64, storage_connections_warn_limit, 1000, "Warning massages are written to the logs if number of in-use connections are higher than this limit. The limit applies to the storages connections.", 0) \
|
||||
M(UInt64, storage_connections_store_limit, 5000, "Connections above this limit reset after use. Set to 0 to turn connection cache off. The limit applies to the storages connections.", 0) \
|
||||
M(UInt64, http_connections_soft_limit, 100, "Connections above this limit have significantly shorter time to live. The limit applies to the http connections which do not belong to any disk or storage.", 0) \
|
||||
M(UInt64, http_connections_warn_limit, 1000, "Warning massages are written to the logs if number of in-use connections are higher than this limit. The limit applies to the http connections which do not belong to any disk or storage.", 0) \
|
||||
M(UInt64, http_connections_store_limit, 5000, "Connections above this limit reset after use. Set to 0 to turn connection cache off. The limit applies to the http connections which do not belong to any disk or storage.", 0) \
|
||||
M(UInt64, global_profiler_real_time_period_ns, 0, "Period for real clock timer of global profiler (in nanoseconds). Set 0 value to turn off the real clock global profiler. Recommended value is at least 10000000 (100 times a second) for single queries or 1000000000 (once a second) for cluster-wide profiling.", 0) \
|
||||
M(UInt64, global_profiler_cpu_time_period_ns, 0, "Period for CPU clock timer of global profiler (in nanoseconds). Set 0 value to turn off the CPU clock global profiler. Recommended value is at least 10000000 (100 times a second) for single queries or 1000000000 (once a second) for cluster-wide profiling.", 0) \
|
||||
M(Bool, enable_azure_sdk_logging, false, "Enables logging from Azure sdk", 0) \
|
||||
M(UInt64, max_entries_for_hash_table_stats, 10'000, "How many entries hash table statistics collected during aggregation is allowed to have", 0) \
|
||||
M(String, merge_workload, "default", "Name of workload to be used to access resources for all merges (may be overridden by a merge tree setting)", 0) \
|
||||
M(String, mutation_workload, "default", "Name of workload to be used to access resources for all mutations (may be overridden by a merge tree setting)", 0) \
|
||||
M(Bool, prepare_system_log_tables_on_startup, false, "If true, ClickHouse creates all configured `system.*_log` tables before the startup. It can be helpful if some startup scripts depend on these tables.", 0) \
|
||||
M(Double, gwp_asan_force_sample_probability, 0.0003, "Probability that an allocation from specific places will be sampled by GWP Asan (i.e. PODArray allocations)", 0) \
|
||||
M(UInt64, config_reload_interval_ms, 2000, "How often clickhouse will reload config and check for new changes", 0) \
|
||||
M(UInt64, memory_worker_period_ms, 0, "Tick period of background memory worker which corrects memory tracker memory usages and cleans up unused pages during higher memory usage. If set to 0, default value will be used depending on the memory usage source", 0) \
|
||||
M(Bool, disable_insertion_and_mutation, false, "Disable all insert/alter/delete queries. This setting will be enabled if someone needs read-only nodes to prevent insertion and mutation affect reading performance.", 0) \
|
||||
M(UInt64, parts_kill_delay_period, 30, "Period to completely remove parts for SharedMergeTree. Only available in ClickHouse Cloud", 0) \
|
||||
M(UInt64, parts_kill_delay_period_random_add, 10, "Add uniformly distributed value from 0 to x seconds to kill_delay_period to avoid thundering herd effect and subsequent DoS of ZooKeeper in case of very large number of tables. Only available in ClickHouse Cloud", 0) \
|
||||
M(UInt64, parts_killer_pool_size, 128, "Threads for cleanup of shared merge tree outdated threads. Only available in ClickHouse Cloud", 0) \
|
||||
M(UInt64, keeper_multiread_batch_size, 10'000, "Maximum size of batch for MultiRead request to [Zoo]Keeper that support batching. If set to 0, batching is disabled. Available only in ClickHouse Cloud.", 0) \
|
||||
M(Bool, use_legacy_mongodb_integration, true, "Use the legacy MongoDB integration implementation. Note: it's highly recommended to set this option to false, since legacy implementation will be removed in the future. Please submit any issues you encounter with the new implementation.", 0) \
|
||||
DECLARE(UInt64, background_pool_size, 16, "The maximum number of threads what will be used for merging or mutating data parts for *MergeTree-engine tables in a background.", 0) \
|
||||
DECLARE(Float, background_merges_mutations_concurrency_ratio, 2, "The number of part mutation tasks that can be executed concurrently by each thread in background pool.", 0) \
|
||||
DECLARE(String, background_merges_mutations_scheduling_policy, "round_robin", "The policy on how to perform a scheduling for background merges and mutations. Possible values are: `round_robin` and `shortest_task_first`. ", 0) \
|
||||
DECLARE(UInt64, background_move_pool_size, 8, "The maximum number of threads that will be used for moving data parts to another disk or volume for *MergeTree-engine tables in a background.", 0) \
|
||||
DECLARE(UInt64, background_fetches_pool_size, 16, "The maximum number of threads that will be used for fetching data parts from another replica for *MergeTree-engine tables in a background.", 0) \
|
||||
DECLARE(UInt64, background_common_pool_size, 8, "The maximum number of threads that will be used for performing a variety of operations (mostly garbage collection) for *MergeTree-engine tables in a background.", 0) \
|
||||
DECLARE(UInt64, background_buffer_flush_schedule_pool_size, 16, "The maximum number of threads that will be used for performing flush operations for Buffer-engine tables in a background.", 0) \
|
||||
DECLARE(UInt64, background_schedule_pool_size, 512, "The maximum number of threads that will be used for constantly executing some lightweight periodic operations.", 0) \
|
||||
DECLARE(UInt64, background_message_broker_schedule_pool_size, 16, "The maximum number of threads that will be used for executing background operations for message streaming.", 0) \
|
||||
DECLARE(UInt64, background_distributed_schedule_pool_size, 16, "The maximum number of threads that will be used for executing distributed sends.", 0) \
|
||||
DECLARE(UInt64, tables_loader_foreground_pool_size, 0, "The maximum number of threads that will be used for foreground (that is being waited for by a query) loading of tables. Also used for synchronous loading of tables before the server start. Zero means use all CPUs.", 0) \
|
||||
DECLARE(UInt64, tables_loader_background_pool_size, 0, "The maximum number of threads that will be used for background async loading of tables. Zero means use all CPUs.", 0) \
|
||||
DECLARE(Bool, async_load_databases, false, "Enable asynchronous loading of databases and tables to speedup server startup. Queries to not yet loaded entity will be blocked until load is finished.", 0) \
|
||||
DECLARE(Bool, display_secrets_in_show_and_select, false, "Allow showing secrets in SHOW and SELECT queries via a format setting and a grant", 0) \
|
||||
DECLARE(Seconds, keep_alive_timeout, DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT, "The number of seconds that ClickHouse waits for incoming requests before closing the connection.", 0) \
|
||||
DECLARE(UInt64, max_keep_alive_requests, 10000, "The maximum number of requests handled via a single http keepalive connection before the server closes this connection.", 0) \
|
||||
DECLARE(Seconds, replicated_fetches_http_connection_timeout, 0, "HTTP connection timeout for part fetch requests. Inherited from default profile `http_connection_timeout` if not set explicitly.", 0) \
|
||||
DECLARE(Seconds, replicated_fetches_http_send_timeout, 0, "HTTP send timeout for part fetch requests. Inherited from default profile `http_send_timeout` if not set explicitly.", 0) \
|
||||
DECLARE(Seconds, replicated_fetches_http_receive_timeout, 0, "HTTP receive timeout for fetch part requests. Inherited from default profile `http_receive_timeout` if not set explicitly.", 0) \
|
||||
DECLARE(UInt64, total_memory_profiler_step, 0, "Whenever server memory usage becomes larger than every next step in number of bytes the memory profiler will collect the allocating stack trace. Zero means disabled memory profiler. Values lower than a few megabytes will slow down server.", 0) \
|
||||
DECLARE(Double, total_memory_tracker_sample_probability, 0, "Collect random allocations and deallocations and write them into system.trace_log with 'MemorySample' trace_type. The probability is for every alloc/free regardless to the size of the allocation (can be changed with `memory_profiler_sample_min_allocation_size` and `memory_profiler_sample_max_allocation_size`). Note that sampling happens only when the amount of untracked memory exceeds 'max_untracked_memory'. You may want to set 'max_untracked_memory' to 0 for extra fine grained sampling.", 0) \
|
||||
DECLARE(UInt64, total_memory_profiler_sample_min_allocation_size, 0, "Collect random allocations of size greater or equal than specified value with probability equal to `total_memory_profiler_sample_probability`. 0 means disabled. You may want to set 'max_untracked_memory' to 0 to make this threshold to work as expected.", 0) \
|
||||
DECLARE(UInt64, total_memory_profiler_sample_max_allocation_size, 0, "Collect random allocations of size less or equal than specified value with probability equal to `total_memory_profiler_sample_probability`. 0 means disabled. You may want to set 'max_untracked_memory' to 0 to make this threshold to work as expected.", 0) \
|
||||
DECLARE(Bool, validate_tcp_client_information, false, "Validate client_information in the query packet over the native TCP protocol.", 0) \
|
||||
DECLARE(Bool, storage_metadata_write_full_object_key, false, "Write disk metadata files with VERSION_FULL_OBJECT_KEY format", 0) \
|
||||
DECLARE(UInt64, max_materialized_views_count_for_table, 0, "A limit on the number of materialized views attached to a table.", 0) \
|
||||
DECLARE(UInt32, max_database_replicated_create_table_thread_pool_size, 1, "The number of threads to create tables during replica recovery in DatabaseReplicated. Zero means number of threads equal number of cores.", 0) \
|
||||
DECLARE(Bool, database_replicated_allow_detach_permanently, true, "Allow detaching tables permanently in Replicated databases", 0) \
|
||||
DECLARE(Bool, format_alter_operations_with_parentheses, false, "If enabled, each operation in alter queries will be surrounded with parentheses in formatted queries to make them less ambiguous.", 0) \
|
||||
DECLARE(String, default_replica_path, "/clickhouse/tables/{uuid}/{shard}", "The path to the table in ZooKeeper", 0) \
|
||||
DECLARE(String, default_replica_name, "{replica}", "The replica name in ZooKeeper", 0) \
|
||||
DECLARE(UInt64, disk_connections_soft_limit, 5000, "Connections above this limit have significantly shorter time to live. The limit applies to the disks connections.", 0) \
|
||||
DECLARE(UInt64, disk_connections_warn_limit, 10000, "Warning massages are written to the logs if number of in-use connections are higher than this limit. The limit applies to the disks connections.", 0) \
|
||||
DECLARE(UInt64, disk_connections_store_limit, 30000, "Connections above this limit reset after use. Set to 0 to turn connection cache off. The limit applies to the disks connections.", 0) \
|
||||
DECLARE(UInt64, storage_connections_soft_limit, 100, "Connections above this limit have significantly shorter time to live. The limit applies to the storages connections.", 0) \
|
||||
DECLARE(UInt64, storage_connections_warn_limit, 1000, "Warning massages are written to the logs if number of in-use connections are higher than this limit. The limit applies to the storages connections.", 0) \
|
||||
DECLARE(UInt64, storage_connections_store_limit, 5000, "Connections above this limit reset after use. Set to 0 to turn connection cache off. The limit applies to the storages connections.", 0) \
|
||||
DECLARE(UInt64, http_connections_soft_limit, 100, "Connections above this limit have significantly shorter time to live. The limit applies to the http connections which do not belong to any disk or storage.", 0) \
|
||||
DECLARE(UInt64, http_connections_warn_limit, 1000, "Warning massages are written to the logs if number of in-use connections are higher than this limit. The limit applies to the http connections which do not belong to any disk or storage.", 0) \
|
||||
DECLARE(UInt64, http_connections_store_limit, 5000, "Connections above this limit reset after use. Set to 0 to turn connection cache off. The limit applies to the http connections which do not belong to any disk or storage.", 0) \
|
||||
DECLARE(UInt64, global_profiler_real_time_period_ns, 0, "Period for real clock timer of global profiler (in nanoseconds). Set 0 value to turn off the real clock global profiler. Recommended value is at least 10000000 (100 times a second) for single queries or 1000000000 (once a second) for cluster-wide profiling.", 0) \
|
||||
DECLARE(UInt64, global_profiler_cpu_time_period_ns, 0, "Period for CPU clock timer of global profiler (in nanoseconds). Set 0 value to turn off the CPU clock global profiler. Recommended value is at least 10000000 (100 times a second) for single queries or 1000000000 (once a second) for cluster-wide profiling.", 0) \
|
||||
DECLARE(Bool, enable_azure_sdk_logging, false, "Enables logging from Azure sdk", 0) \
|
||||
DECLARE(UInt64, max_entries_for_hash_table_stats, 10'000, "How many entries hash table statistics collected during aggregation is allowed to have", 0) \
|
||||
DECLARE(String, merge_workload, "default", "Name of workload to be used to access resources for all merges (may be overridden by a merge tree setting)", 0) \
|
||||
DECLARE(String, mutation_workload, "default", "Name of workload to be used to access resources for all mutations (may be overridden by a merge tree setting)", 0) \
|
||||
DECLARE(Bool, prepare_system_log_tables_on_startup, false, "If true, ClickHouse creates all configured `system.*_log` tables before the startup. It can be helpful if some startup scripts depend on these tables.", 0) \
|
||||
DECLARE(Double, gwp_asan_force_sample_probability, 0.0003, "Probability that an allocation from specific places will be sampled by GWP Asan (i.e. PODArray allocations)", 0) \
|
||||
DECLARE(UInt64, config_reload_interval_ms, 2000, "How often clickhouse will reload config and check for new changes", 0) \
|
||||
DECLARE(UInt64, memory_worker_period_ms, 0, "Tick period of background memory worker which corrects memory tracker memory usages and cleans up unused pages during higher memory usage. If set to 0, default value will be used depending on the memory usage source", 0) \
|
||||
DECLARE(Bool, disable_insertion_and_mutation, false, "Disable all insert/alter/delete queries. This setting will be enabled if someone needs read-only nodes to prevent insertion and mutation affect reading performance.", 0) \
|
||||
DECLARE(UInt64, parts_kill_delay_period, 30, "Period to completely remove parts for SharedMergeTree. Only available in ClickHouse Cloud", 0) \
|
||||
DECLARE(UInt64, parts_kill_delay_period_random_add, 10, "Add uniformly distributed value from 0 to x seconds to kill_delay_period to avoid thundering herd effect and subsequent DoS of ZooKeeper in case of very large number of tables. Only available in ClickHouse Cloud", 0) \
|
||||
DECLARE(UInt64, parts_killer_pool_size, 128, "Threads for cleanup of shared merge tree outdated threads. Only available in ClickHouse Cloud", 0) \
|
||||
DECLARE(UInt64, keeper_multiread_batch_size, 10'000, "Maximum size of batch for MultiRead request to [Zoo]Keeper that support batching. If set to 0, batching is disabled. Available only in ClickHouse Cloud.", 0) \
|
||||
DECLARE(Bool, use_legacy_mongodb_integration, true, "Use the legacy MongoDB integration implementation. Note: it's highly recommended to set this option to false, since legacy implementation will be removed in the future. Please submit any issues you encounter with the new implementation.", 0) \
|
||||
|
||||
/// If you add a setting which can be updated at runtime, please update 'changeable_settings' map in dumpToSystemServerSettingsColumns below
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -77,13 +77,16 @@ static std::initializer_list<std::pair<ClickHouseVersion, SettingsChangesHistory
|
||||
{"restore_replace_external_dictionary_source_to_null", false, false, "New setting."},
|
||||
{"show_create_query_identifier_quoting_rule", "when_necessary", "when_necessary", "New setting."},
|
||||
{"show_create_query_identifier_quoting_style", "Backticks", "Backticks", "New setting."},
|
||||
{"merge_tree_min_read_task_size", 8, 8, "New setting"},
|
||||
{"merge_tree_min_rows_for_concurrent_read_for_remote_filesystem", (20 * 8192), 0, "Setting is deprecated"},
|
||||
{"merge_tree_min_bytes_for_concurrent_read_for_remote_filesystem", (24 * 10 * 1024 * 1024), 0, "Setting is deprecated"},
|
||||
{"implicit_select", false, false, "A new setting."},
|
||||
{"output_format_native_write_json_as_string", false, false, "Add new setting to allow write JSON column as single String column in Native format"},
|
||||
{"output_format_binary_write_json_as_string", false, false, "Add new setting to write values of JSON type as JSON string in RowBinary output format"},
|
||||
{"input_format_binary_read_json_as_string", false, false, "Add new setting to read values of JSON type as JSON string in RowBinary input format"},
|
||||
{"min_free_disk_bytes_to_perform_insert", 0, 0, "New setting."},
|
||||
{"min_free_disk_ratio_to_perform_insert", 0.0, 0.0, "New setting."},
|
||||
{"enable_named_columns_in_function_tuple", false, false, "Force disable the setting since it breaks queries"},
|
||||
{"enable_named_columns_in_function_tuple", false, true, "Re-enable the setting since all known bugs are fixed"},
|
||||
{"cloud_mode_database_engine", 1, 1, "A setting for ClickHouse Cloud"},
|
||||
{"allow_experimental_shared_set_join", 1, 1, "A setting for ClickHouse Cloud"},
|
||||
{"read_through_distributed_cache", 0, 0, "A setting for ClickHouse Cloud"},
|
||||
|
@ -1,20 +1,21 @@
|
||||
#pragma once
|
||||
|
||||
#include <Access/Common/SQLSecurityDefs.h>
|
||||
#include <Core/Joins.h>
|
||||
#include <Core/LoadBalancing.h>
|
||||
#include <Core/LogsLevel.h>
|
||||
#include <Core/MergeSelectorAlgorithm.h>
|
||||
#include <Core/ParallelReplicasMode.h>
|
||||
#include <Core/QueryLogElementType.h>
|
||||
#include <Core/SchemaInferenceMode.h>
|
||||
#include <Core/SettingsFields.h>
|
||||
#include <Core/ShortCircuitFunctionEvaluation.h>
|
||||
#include <Core/ParallelReplicasMode.h>
|
||||
#include <Core/StreamingHandleErrorMode.h>
|
||||
#include <Formats/FormatSettings.h>
|
||||
#include <IO/ReadSettings.h>
|
||||
#include <Access/Common/SQLSecurityDefs.h>
|
||||
#include <Parsers/IdentifierQuotingStyle.h>
|
||||
#include <QueryPipeline/SizeLimits.h>
|
||||
#include <Common/ShellCommandSettings.h>
|
||||
#include <Core/MergeSelectorAlgorithm.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -263,14 +264,6 @@ enum class DistributedDDLOutputMode : uint8_t
|
||||
|
||||
DECLARE_SETTING_ENUM(DistributedDDLOutputMode)
|
||||
|
||||
enum class StreamingHandleErrorMode : uint8_t
|
||||
{
|
||||
DEFAULT = 0, // Ignore errors with threshold.
|
||||
STREAM, // Put errors to stream in the virtual column named ``_error.
|
||||
/*FIXED_SYSTEM_TABLE, Put errors to in a fixed system table likely system.kafka_errors. This is not implemented now. */
|
||||
/*CUSTOM_SYSTEM_TABLE, Put errors to in a custom system table. This is not implemented now. */
|
||||
};
|
||||
|
||||
DECLARE_SETTING_ENUM(StreamingHandleErrorMode)
|
||||
|
||||
DECLARE_SETTING_ENUM(ShortCircuitFunctionEvaluation)
|
||||
|
16
src/Core/StreamingHandleErrorMode.h
Normal file
16
src/Core/StreamingHandleErrorMode.h
Normal file
@ -0,0 +1,16 @@
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
enum class StreamingHandleErrorMode : uint8_t
|
||||
{
|
||||
DEFAULT = 0, // Ignore errors with threshold.
|
||||
STREAM, // Put errors to stream in the virtual column named ``_error.
|
||||
/*FIXED_SYSTEM_TABLE, Put errors to in a fixed system table likely system.kafka_errors. This is not implemented now. */
|
||||
/*CUSTOM_SYSTEM_TABLE, Put errors to in a custom system table. This is not implemented now. */
|
||||
};
|
||||
|
||||
}
|
@ -47,8 +47,8 @@ public:
|
||||
|
||||
Field getDefault() const override;
|
||||
|
||||
DataTypePtr getNormalizedType() const override { return std::make_shared<DataTypeArray>(nested->getNormalizedType()); }
|
||||
bool equals(const IDataType & rhs) const override;
|
||||
|
||||
bool isParametric() const override { return true; }
|
||||
bool haveSubtypes() const override { return true; }
|
||||
bool cannotBeStoredInTables() const override { return nested->cannotBeStoredInTables(); }
|
||||
|
@ -43,7 +43,10 @@ public:
|
||||
bool isParametric() const override { return true; }
|
||||
bool haveSubtypes() const override { return true; }
|
||||
bool hasDynamicSubcolumnsDeprecated() const override { return nested->hasDynamicSubcolumnsDeprecated(); }
|
||||
|
||||
DataTypePtr getNormalizedType() const override
|
||||
{
|
||||
return std::make_shared<DataTypeMap>(key_type->getNormalizedType(), value_type->getNormalizedType());
|
||||
}
|
||||
const DataTypePtr & getKeyType() const { return key_type; }
|
||||
const DataTypePtr & getValueType() const { return value_type; }
|
||||
DataTypes getKeyValueTypes() const { return {key_type, value_type}; }
|
||||
|
@ -133,6 +133,14 @@ std::string DataTypeTuple::doGetPrettyName(size_t indent) const
|
||||
return s.str();
|
||||
}
|
||||
|
||||
DataTypePtr DataTypeTuple::getNormalizedType() const
|
||||
{
|
||||
DataTypes normalized_elems;
|
||||
normalized_elems.reserve(elems.size());
|
||||
for (const auto & elem : elems)
|
||||
normalized_elems.emplace_back(elem->getNormalizedType());
|
||||
return std::make_shared<DataTypeTuple>(normalized_elems);
|
||||
}
|
||||
|
||||
static inline IColumn & extractElementColumn(IColumn & column, size_t idx)
|
||||
{
|
||||
|
@ -61,6 +61,7 @@ public:
|
||||
MutableSerializationInfoPtr createSerializationInfo(const SerializationInfoSettings & settings) const override;
|
||||
SerializationInfoPtr getSerializationInfo(const IColumn & column) const override;
|
||||
|
||||
DataTypePtr getNormalizedType() const override;
|
||||
const DataTypePtr & getElement(size_t i) const { return elems[i]; }
|
||||
const DataTypes & getElements() const { return elems; }
|
||||
const Strings & getElementNames() const { return names; }
|
||||
|
@ -88,6 +88,15 @@ public:
|
||||
|
||||
DataTypePtr getPtr() const { return shared_from_this(); }
|
||||
|
||||
/// Returns the normalized form of the current type, currently handling the
|
||||
/// conversion of named tuples to unnamed tuples.
|
||||
///
|
||||
/// This is useful for converting aggregate states into a normalized form with
|
||||
/// normalized argument types. E.g, `AggregateFunction(uniq, Tuple(a int, b int))`
|
||||
/// should be convertible to `AggregateFunction(uniq, Tuple(int, int))`, as both
|
||||
/// have same memory layouts for state representation and the same serialization.
|
||||
virtual DataTypePtr getNormalizedType() const { return shared_from_this(); }
|
||||
|
||||
/// Name of data type family (example: FixedString, Array).
|
||||
virtual const char * getFamilyName() const = 0;
|
||||
|
||||
|
@ -7,13 +7,13 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
#define LIST_OF_DATABASE_REPLICATED_SETTINGS(M, ALIAS) \
|
||||
M(Float, max_broken_tables_ratio, 1, "Do not recover replica automatically if the ratio of staled tables to all tables is greater", 0) \
|
||||
M(UInt64, max_replication_lag_to_enqueue, 50, "Replica will throw exception on attempt to execute query if its replication lag greater", 0) \
|
||||
M(UInt64, wait_entry_commited_timeout_sec, 3600, "Replicas will try to cancel query if timeout exceed, but initiator host has not executed it yet", 0) \
|
||||
M(String, collection_name, "", "A name of a collection defined in server's config where all info for cluster authentication is defined", 0) \
|
||||
M(Bool, check_consistency, true, "Check consistency of local metadata and metadata in Keeper, do replica recovery on inconsistency", 0) \
|
||||
M(UInt64, max_retries_before_automatic_recovery, 100, "Max number of attempts to execute a queue entry before marking replica as lost recovering it from snapshot (0 means infinite)", 0) \
|
||||
#define LIST_OF_DATABASE_REPLICATED_SETTINGS(DECLARE, ALIAS) \
|
||||
DECLARE(Float, max_broken_tables_ratio, 1, "Do not recover replica automatically if the ratio of staled tables to all tables is greater", 0) \
|
||||
DECLARE(UInt64, max_replication_lag_to_enqueue, 50, "Replica will throw exception on attempt to execute query if its replication lag greater", 0) \
|
||||
DECLARE(UInt64, wait_entry_commited_timeout_sec, 3600, "Replicas will try to cancel query if timeout exceed, but initiator host has not executed it yet", 0) \
|
||||
DECLARE(String, collection_name, "", "A name of a collection defined in server's config where all info for cluster authentication is defined", 0) \
|
||||
DECLARE(Bool, check_consistency, true, "Check consistency of local metadata and metadata in Keeper, do replica recovery on inconsistency", 0) \
|
||||
DECLARE(UInt64, max_retries_before_automatic_recovery, 100, "Max number of attempts to execute a queue entry before marking replica as lost recovering it from snapshot (0 means infinite)", 0) \
|
||||
|
||||
DECLARE_SETTINGS_TRAITS(DatabaseReplicatedSettingsTraits, LIST_OF_DATABASE_REPLICATED_SETTINGS)
|
||||
IMPLEMENT_SETTINGS_TRAITS(DatabaseReplicatedSettingsTraits, LIST_OF_DATABASE_REPLICATED_SETTINGS)
|
||||
|
@ -4,6 +4,7 @@
|
||||
|
||||
# include <Core/Settings.h>
|
||||
# include <Databases/MySQL/DatabaseMaterializedMySQL.h>
|
||||
# include <Databases/MySQL/MaterializedMySQLSettings.h>
|
||||
# include <Common/parseAddress.h>
|
||||
# include <Common/parseRemoteDescription.h>
|
||||
|
||||
@ -18,6 +19,7 @@
|
||||
# include <Storages/StorageMySQL.h>
|
||||
# include <Storages/StorageMaterializedMySQL.h>
|
||||
# include <Storages/NamedCollectionsHelpers.h>
|
||||
# include <Storages/MySQL/MySQLSettings.h>
|
||||
# include <Common/setThreadName.h>
|
||||
# include <Common/PoolId.h>
|
||||
# include <filesystem>
|
||||
@ -31,6 +33,15 @@ namespace Setting
|
||||
extern const SettingsUInt64 glob_expansion_max_elements;
|
||||
}
|
||||
|
||||
namespace MaterializedMySQLSetting
|
||||
{
|
||||
extern const MaterializedMySQLSettingsBool allows_query_when_mysql_lost;
|
||||
extern const MaterializedMySQLSettingsBool allow_startup_database_without_connection_to_mysql;
|
||||
extern const MaterializedMySQLSettingsUInt64 max_bytes_in_binlog_dispatcher_buffer;
|
||||
extern const MaterializedMySQLSettingsUInt64 max_flush_milliseconds_in_binlog_dispatcher;
|
||||
extern const MaterializedMySQLSettingsBool use_binlog_client;
|
||||
}
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int NOT_IMPLEMENTED;
|
||||
@ -53,11 +64,13 @@ DatabaseMaterializedMySQL::DatabaseMaterializedMySQL(
|
||||
{
|
||||
}
|
||||
|
||||
DatabaseMaterializedMySQL::~DatabaseMaterializedMySQL() = default;
|
||||
|
||||
void DatabaseMaterializedMySQL::rethrowExceptionIfNeeded() const
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
|
||||
if (!settings->allows_query_when_mysql_lost && exception)
|
||||
if (!(*settings)[MaterializedMySQLSetting::allows_query_when_mysql_lost] && exception)
|
||||
{
|
||||
try
|
||||
{
|
||||
@ -89,7 +102,7 @@ LoadTaskPtr DatabaseMaterializedMySQL::startupDatabaseAsync(AsyncLoader & async_
|
||||
[this, mode] (AsyncLoader &, const LoadJobPtr &)
|
||||
{
|
||||
LOG_TRACE(log, "Starting MaterializeMySQL database");
|
||||
if (!settings->allow_startup_database_without_connection_to_mysql
|
||||
if (!(*settings)[MaterializedMySQLSetting::allow_startup_database_without_connection_to_mysql]
|
||||
&& mode < LoadingStrictnessLevel::FORCE_ATTACH)
|
||||
materialize_thread.assertMySQLAvailable();
|
||||
|
||||
@ -266,11 +279,11 @@ void registerDatabaseMaterializedMySQL(DatabaseFactory & factory)
|
||||
if (engine_define->settings)
|
||||
materialize_mode_settings->loadFromQuery(*engine_define);
|
||||
|
||||
if (materialize_mode_settings->use_binlog_client)
|
||||
if ((*materialize_mode_settings)[MaterializedMySQLSetting::use_binlog_client])
|
||||
binlog_client = DB::MySQLReplication::BinlogClientFactory::instance().getClient(
|
||||
configuration.host, configuration.port, configuration.username, configuration.password,
|
||||
materialize_mode_settings->max_bytes_in_binlog_dispatcher_buffer,
|
||||
materialize_mode_settings->max_flush_milliseconds_in_binlog_dispatcher);
|
||||
(*materialize_mode_settings)[MaterializedMySQLSetting::max_bytes_in_binlog_dispatcher_buffer],
|
||||
(*materialize_mode_settings)[MaterializedMySQLSetting::max_flush_milliseconds_in_binlog_dispatcher]);
|
||||
|
||||
if (args.uuid == UUIDHelpers::Nil)
|
||||
{
|
||||
|
@ -10,13 +10,14 @@
|
||||
#include <Databases/IDatabase.h>
|
||||
#include <Databases/DatabaseAtomic.h>
|
||||
#include <Databases/MySQL/MySQLBinlogClient.h>
|
||||
#include <Databases/MySQL/MaterializedMySQLSettings.h>
|
||||
#include <Databases/MySQL/MaterializedMySQLSyncThread.h>
|
||||
#include <Common/logger_useful.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
struct MaterializedMySQLSettings;
|
||||
|
||||
/** Real-time pull table structure and data from remote MySQL
|
||||
*
|
||||
* All table structure and data will be written to the local file system
|
||||
@ -35,6 +36,8 @@ public:
|
||||
const MySQLReplication::BinlogClientPtr & binlog_client_,
|
||||
std::unique_ptr<MaterializedMySQLSettings> settings_);
|
||||
|
||||
~DatabaseMaterializedMySQL() override;
|
||||
|
||||
void rethrowExceptionIfNeeded() const;
|
||||
|
||||
void setException(const std::exception_ptr & exception);
|
||||
|
@ -46,6 +46,11 @@ namespace Setting
|
||||
extern const SettingsUInt64 max_parser_depth;
|
||||
}
|
||||
|
||||
namespace MySQLSetting
|
||||
{
|
||||
extern const MySQLSettingsMySQLDataTypesSupport mysql_datatypes_support_level;
|
||||
}
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int NOT_IMPLEMENTED;
|
||||
@ -329,7 +334,7 @@ DatabaseMySQL::fetchTablesColumnsList(const std::vector<String> & tables_name, C
|
||||
database_name_in_mysql,
|
||||
tables_name,
|
||||
settings,
|
||||
mysql_settings->mysql_datatypes_support_level);
|
||||
(*mysql_settings)[MySQLSetting::mysql_datatypes_support_level]);
|
||||
}
|
||||
|
||||
void DatabaseMySQL::shutdown()
|
||||
|
@ -9,7 +9,6 @@
|
||||
#include <Core/NamesAndTypes.h>
|
||||
#include <Common/ThreadPool.h>
|
||||
#include <Storages/ColumnsDescription.h>
|
||||
#include <Storages/MySQL/MySQLSettings.h>
|
||||
#include <Databases/DatabasesCommon.h>
|
||||
#include <Parsers/ASTCreateQuery.h>
|
||||
#include <mysqlxx/PoolWithFailover.h>
|
||||
@ -26,7 +25,7 @@ namespace DB
|
||||
{
|
||||
|
||||
class Context;
|
||||
|
||||
struct MySQLSettings;
|
||||
enum class MySQLDataTypesSupport : uint8_t;
|
||||
|
||||
/** Real-time access to table list and table structure from remote MySQL
|
||||
|
@ -1,7 +1,8 @@
|
||||
#include <Core/BaseSettings.h>
|
||||
#include <Core/BaseSettingsFwdMacrosImpl.h>
|
||||
#include <Databases/MySQL/MaterializedMySQLSettings.h>
|
||||
|
||||
#include <Parsers/ASTFunction.h>
|
||||
#include <Parsers/ASTCreateQuery.h>
|
||||
#include <Parsers/ASTFunction.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -11,15 +12,65 @@ namespace ErrorCodes
|
||||
extern const int UNKNOWN_SETTING;
|
||||
}
|
||||
|
||||
#define LIST_OF_MATERIALIZE_MODE_SETTINGS(DECLARE, ALIAS) \
|
||||
DECLARE(UInt64, max_rows_in_buffer, DEFAULT_BLOCK_SIZE, "Max rows that data is allowed to cache in memory(for single table and the cache data unable to query). when rows is exceeded, the data will be materialized", 0) \
|
||||
DECLARE(UInt64, max_bytes_in_buffer, DBMS_DEFAULT_BUFFER_SIZE, "Max bytes that data is allowed to cache in memory(for single table and the cache data unable to query). when rows is exceeded, the data will be materialized", 0) \
|
||||
DECLARE(UInt64, max_rows_in_buffers, DEFAULT_BLOCK_SIZE, "Max rows that data is allowed to cache in memory(for database and the cache data unable to query). when rows is exceeded, the data will be materialized", 0) \
|
||||
DECLARE(UInt64, max_bytes_in_buffers, DBMS_DEFAULT_BUFFER_SIZE, "Max bytes that data is allowed to cache in memory(for database and the cache data unable to query). when rows is exceeded, the data will be materialized", 0) \
|
||||
DECLARE(UInt64, max_flush_data_time, 1000, "Max milliseconds that data is allowed to cache in memory(for database and the cache data unable to query). when this time is exceeded, the data will be materialized", 0) \
|
||||
DECLARE(Int64, max_wait_time_when_mysql_unavailable, 1000, "Retry interval when MySQL is not available (milliseconds). Negative value disable retry.", 0) \
|
||||
DECLARE(Bool, allows_query_when_mysql_lost, false, "Allow query materialized table when mysql is lost.", 0) \
|
||||
DECLARE(String, materialized_mysql_tables_list, "", "a comma-separated list of mysql database tables, which will be replicated by MaterializedMySQL database engine. Default value: empty list — means whole tables will be replicated.", 0) \
|
||||
DECLARE(Bool, use_binlog_client, false, "Use MySQL Binlog Client.", 0) \
|
||||
DECLARE(UInt64, max_bytes_in_binlog_queue, 64 * 1024 * 1024, "Max bytes in binlog's queue created from MySQL Binlog Client.", 0) \
|
||||
DECLARE(UInt64, max_milliseconds_to_wait_in_binlog_queue, 10000, "Max milliseconds to wait when max bytes exceeded in a binlog queue.", 0) \
|
||||
DECLARE(UInt64, max_bytes_in_binlog_dispatcher_buffer, DBMS_DEFAULT_BUFFER_SIZE, "Max bytes in the binlog dispatcher's buffer before it is flushed to attached binlogs.", 0) \
|
||||
DECLARE(UInt64, max_flush_milliseconds_in_binlog_dispatcher, 1000, "Max milliseconds in the binlog dispatcher's buffer to wait before it is flushed to attached binlogs.", 0) \
|
||||
DECLARE(Bool, allow_startup_database_without_connection_to_mysql, false, "Allow to create and attach database without available connection to MySQL.", 0) \
|
||||
|
||||
DECLARE_SETTINGS_TRAITS(MaterializedMySQLSettingsTraits, LIST_OF_MATERIALIZE_MODE_SETTINGS)
|
||||
IMPLEMENT_SETTINGS_TRAITS(MaterializedMySQLSettingsTraits, LIST_OF_MATERIALIZE_MODE_SETTINGS)
|
||||
|
||||
struct MaterializedMySQLSettingsImpl : public BaseSettings<MaterializedMySQLSettingsTraits>
|
||||
{
|
||||
};
|
||||
|
||||
#define INITIALIZE_SETTING_EXTERN(TYPE, NAME, DEFAULT, DESCRIPTION, FLAGS) \
|
||||
MaterializedMySQLSettings##TYPE NAME = &MaterializedMySQLSettingsImpl ::NAME;
|
||||
|
||||
namespace MaterializedMySQLSetting
|
||||
{
|
||||
LIST_OF_MATERIALIZE_MODE_SETTINGS(INITIALIZE_SETTING_EXTERN, SKIP_ALIAS)
|
||||
}
|
||||
|
||||
#undef INITIALIZE_SETTING_EXTERN
|
||||
|
||||
MaterializedMySQLSettings::MaterializedMySQLSettings() : impl(std::make_unique<MaterializedMySQLSettingsImpl>())
|
||||
{
|
||||
}
|
||||
|
||||
MaterializedMySQLSettings::MaterializedMySQLSettings(const MaterializedMySQLSettings & settings)
|
||||
: impl(std::make_unique<MaterializedMySQLSettingsImpl>(*settings.impl))
|
||||
{
|
||||
}
|
||||
|
||||
MaterializedMySQLSettings::MaterializedMySQLSettings(MaterializedMySQLSettings && settings) noexcept
|
||||
: impl(std::make_unique<MaterializedMySQLSettingsImpl>(std::move(*settings.impl)))
|
||||
{
|
||||
}
|
||||
|
||||
MaterializedMySQLSettings::~MaterializedMySQLSettings() = default;
|
||||
|
||||
MATERIALIZED_MYSQL_SETTINGS_SUPPORTED_TYPES(MaterializedMySQLSettings, IMPLEMENT_SETTING_SUBSCRIPT_OPERATOR)
|
||||
|
||||
|
||||
void MaterializedMySQLSettings::loadFromQuery(ASTStorage & storage_def)
|
||||
{
|
||||
if (storage_def.settings)
|
||||
{
|
||||
try
|
||||
{
|
||||
applyChanges(storage_def.settings->changes);
|
||||
impl->applyChanges(storage_def.settings->changes);
|
||||
}
|
||||
catch (Exception & e)
|
||||
{
|
||||
|
@ -1,38 +1,39 @@
|
||||
#pragma once
|
||||
|
||||
#include <Core/Defines.h>
|
||||
#include <Core/BaseSettings.h>
|
||||
#include <Core/BaseSettingsFwdMacros.h>
|
||||
#include <Core/SettingsFields.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class ASTStorage;
|
||||
struct MaterializedMySQLSettingsImpl;
|
||||
|
||||
#define LIST_OF_MATERIALIZE_MODE_SETTINGS(M, ALIAS) \
|
||||
M(UInt64, max_rows_in_buffer, DEFAULT_BLOCK_SIZE, "Max rows that data is allowed to cache in memory(for single table and the cache data unable to query). when rows is exceeded, the data will be materialized", 0) \
|
||||
M(UInt64, max_bytes_in_buffer, DBMS_DEFAULT_BUFFER_SIZE, "Max bytes that data is allowed to cache in memory(for single table and the cache data unable to query). when rows is exceeded, the data will be materialized", 0) \
|
||||
M(UInt64, max_rows_in_buffers, DEFAULT_BLOCK_SIZE, "Max rows that data is allowed to cache in memory(for database and the cache data unable to query). when rows is exceeded, the data will be materialized", 0) \
|
||||
M(UInt64, max_bytes_in_buffers, DBMS_DEFAULT_BUFFER_SIZE, "Max bytes that data is allowed to cache in memory(for database and the cache data unable to query). when rows is exceeded, the data will be materialized", 0) \
|
||||
M(UInt64, max_flush_data_time, 1000, "Max milliseconds that data is allowed to cache in memory(for database and the cache data unable to query). when this time is exceeded, the data will be materialized", 0) \
|
||||
M(Int64, max_wait_time_when_mysql_unavailable, 1000, "Retry interval when MySQL is not available (milliseconds). Negative value disable retry.", 0) \
|
||||
M(Bool, allows_query_when_mysql_lost, false, "Allow query materialized table when mysql is lost.", 0) \
|
||||
M(String, materialized_mysql_tables_list, "", "a comma-separated list of mysql database tables, which will be replicated by MaterializedMySQL database engine. Default value: empty list — means whole tables will be replicated.", 0) \
|
||||
M(Bool, use_binlog_client, false, "Use MySQL Binlog Client.", 0) \
|
||||
M(UInt64, max_bytes_in_binlog_queue, 64 * 1024 * 1024, "Max bytes in binlog's queue created from MySQL Binlog Client.", 0) \
|
||||
M(UInt64, max_milliseconds_to_wait_in_binlog_queue, 10000, "Max milliseconds to wait when max bytes exceeded in a binlog queue.", 0) \
|
||||
M(UInt64, max_bytes_in_binlog_dispatcher_buffer, DBMS_DEFAULT_BUFFER_SIZE, "Max bytes in the binlog dispatcher's buffer before it is flushed to attached binlogs.", 0) \
|
||||
M(UInt64, max_flush_milliseconds_in_binlog_dispatcher, 1000, "Max milliseconds in the binlog dispatcher's buffer to wait before it is flushed to attached binlogs.", 0) \
|
||||
M(Bool, allow_startup_database_without_connection_to_mysql, false, "Allow to create and attach database without available connection to MySQL.", 0) \
|
||||
|
||||
DECLARE_SETTINGS_TRAITS(MaterializedMySQLSettingsTraits, LIST_OF_MATERIALIZE_MODE_SETTINGS)
|
||||
/// List of available types supported in MaterializedMySQLSettings object
|
||||
#define MATERIALIZED_MYSQL_SETTINGS_SUPPORTED_TYPES(CLASS_NAME, M) \
|
||||
M(CLASS_NAME, Bool) \
|
||||
M(CLASS_NAME, Int64) \
|
||||
M(CLASS_NAME, UInt64) \
|
||||
M(CLASS_NAME, String)
|
||||
|
||||
MATERIALIZED_MYSQL_SETTINGS_SUPPORTED_TYPES(MaterializedMySQLSettings, DECLARE_SETTING_TRAIT)
|
||||
|
||||
/** Settings for the MaterializedMySQL database engine.
|
||||
* Could be loaded from a CREATE DATABASE query (SETTINGS clause).
|
||||
*/
|
||||
struct MaterializedMySQLSettings : public BaseSettings<MaterializedMySQLSettingsTraits>
|
||||
struct MaterializedMySQLSettings
|
||||
{
|
||||
MaterializedMySQLSettings();
|
||||
MaterializedMySQLSettings(const MaterializedMySQLSettings & settings);
|
||||
MaterializedMySQLSettings(MaterializedMySQLSettings && settings) noexcept;
|
||||
~MaterializedMySQLSettings();
|
||||
|
||||
MATERIALIZED_MYSQL_SETTINGS_SUPPORTED_TYPES(MaterializedMySQLSettings, DECLARE_SETTING_SUBSCRIPT_OPERATOR)
|
||||
|
||||
void loadFromQuery(ASTStorage & storage_def);
|
||||
|
||||
private:
|
||||
std::unique_ptr<MaterializedMySQLSettingsImpl> impl;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -3,6 +3,7 @@
|
||||
|
||||
#if USE_MYSQL
|
||||
|
||||
#include <Databases/MySQL/MaterializedMySQLSettings.h>
|
||||
#include <Databases/MySQL/MaterializedMySQLSyncThread.h>
|
||||
#include <Databases/MySQL/tryParseTableIDFromDDL.h>
|
||||
#include <Databases/MySQL/tryQuoteUnrecognizedTokens.h>
|
||||
@ -43,6 +44,19 @@ namespace Setting
|
||||
extern const SettingsBool insert_allow_materialized_columns;
|
||||
}
|
||||
|
||||
namespace MaterializedMySQLSetting
|
||||
{
|
||||
extern const MaterializedMySQLSettingsString materialized_mysql_tables_list;
|
||||
extern const MaterializedMySQLSettingsUInt64 max_bytes_in_binlog_queue;
|
||||
extern const MaterializedMySQLSettingsUInt64 max_bytes_in_buffer;
|
||||
extern const MaterializedMySQLSettingsUInt64 max_bytes_in_buffers;
|
||||
extern const MaterializedMySQLSettingsUInt64 max_flush_data_time;
|
||||
extern const MaterializedMySQLSettingsUInt64 max_milliseconds_to_wait_in_binlog_queue;
|
||||
extern const MaterializedMySQLSettingsUInt64 max_rows_in_buffer;
|
||||
extern const MaterializedMySQLSettingsUInt64 max_rows_in_buffers;
|
||||
extern const MaterializedMySQLSettingsInt64 max_wait_time_when_mysql_unavailable;
|
||||
}
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int SYNTAX_ERROR;
|
||||
@ -270,10 +284,10 @@ MaterializedMySQLSyncThread::MaterializedMySQLSyncThread(
|
||||
{
|
||||
query_prefix = "EXTERNAL DDL FROM MySQL(" + backQuoteIfNeed(database_name) + ", " + backQuoteIfNeed(mysql_database_name) + ") ";
|
||||
|
||||
if (!settings->materialized_mysql_tables_list.value.empty())
|
||||
if (!(*settings)[MaterializedMySQLSetting::materialized_mysql_tables_list].value.empty())
|
||||
{
|
||||
Names tables_list;
|
||||
boost::split(tables_list, settings->materialized_mysql_tables_list.value, [](char c){ return c == ','; });
|
||||
boost::split(tables_list, (*settings)[MaterializedMySQLSetting::materialized_mysql_tables_list].value, [](char c){ return c == ','; });
|
||||
for (String & table_name: tables_list)
|
||||
{
|
||||
boost::trim(table_name);
|
||||
@ -305,7 +319,7 @@ void MaterializedMySQLSyncThread::synchronization()
|
||||
}
|
||||
|
||||
/// TODO: add gc task for `sign = -1`(use alter table delete, execute by interval. need final state)
|
||||
UInt64 max_flush_time = settings->max_flush_data_time;
|
||||
UInt64 max_flush_time = (*settings)[MaterializedMySQLSetting::max_flush_data_time];
|
||||
|
||||
try
|
||||
{
|
||||
@ -324,7 +338,7 @@ void MaterializedMySQLSyncThread::synchronization()
|
||||
}
|
||||
catch (const Exception & e)
|
||||
{
|
||||
if (settings->max_wait_time_when_mysql_unavailable < 0)
|
||||
if ((*settings)[MaterializedMySQLSetting::max_wait_time_when_mysql_unavailable] < 0)
|
||||
throw;
|
||||
bool binlog_was_purged = e.code() == ER_MASTER_FATAL_ERROR_READING_BINLOG ||
|
||||
e.code() == ER_MASTER_HAS_PURGED_REQUIRED_GTIDS;
|
||||
@ -335,12 +349,12 @@ void MaterializedMySQLSyncThread::synchronization()
|
||||
LOG_INFO(log, "Lost connection to MySQL");
|
||||
need_reconnect = true;
|
||||
setSynchronizationThreadException(std::current_exception());
|
||||
sleepForMilliseconds(settings->max_wait_time_when_mysql_unavailable);
|
||||
sleepForMilliseconds((*settings)[MaterializedMySQLSetting::max_wait_time_when_mysql_unavailable]);
|
||||
continue;
|
||||
}
|
||||
if (watch.elapsedMilliseconds() > max_flush_time || buffers.checkThresholds(
|
||||
settings->max_rows_in_buffer, settings->max_bytes_in_buffer,
|
||||
settings->max_rows_in_buffers, settings->max_bytes_in_buffers)
|
||||
(*settings)[MaterializedMySQLSetting::max_rows_in_buffer], (*settings)[MaterializedMySQLSetting::max_bytes_in_buffer],
|
||||
(*settings)[MaterializedMySQLSetting::max_rows_in_buffers], (*settings)[MaterializedMySQLSetting::max_bytes_in_buffers])
|
||||
)
|
||||
{
|
||||
watch.restart();
|
||||
@ -550,9 +564,9 @@ bool MaterializedMySQLSyncThread::prepareSynchronized(MaterializeMetadata & meta
|
||||
|
||||
if (connection.isNull())
|
||||
{
|
||||
if (settings->max_wait_time_when_mysql_unavailable < 0)
|
||||
if ((*settings)[MaterializedMySQLSetting::max_wait_time_when_mysql_unavailable] < 0)
|
||||
throw Exception(ErrorCodes::UNKNOWN_EXCEPTION, "Unable to connect to MySQL");
|
||||
sleepForMilliseconds(settings->max_wait_time_when_mysql_unavailable);
|
||||
sleepForMilliseconds((*settings)[MaterializedMySQLSetting::max_wait_time_when_mysql_unavailable]);
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -595,8 +609,8 @@ bool MaterializedMySQLSyncThread::prepareSynchronized(MaterializeMetadata & meta
|
||||
binlog = binlog_client->createBinlog(metadata.executed_gtid_set,
|
||||
database_name,
|
||||
{mysql_database_name},
|
||||
settings->max_bytes_in_binlog_queue,
|
||||
settings->max_milliseconds_to_wait_in_binlog_queue);
|
||||
(*settings)[MaterializedMySQLSetting::max_bytes_in_binlog_queue],
|
||||
(*settings)[MaterializedMySQLSetting::max_milliseconds_to_wait_in_binlog_queue]);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -611,7 +625,7 @@ bool MaterializedMySQLSyncThread::prepareSynchronized(MaterializeMetadata & meta
|
||||
{
|
||||
tryLogCurrentException(log);
|
||||
|
||||
if (settings->max_wait_time_when_mysql_unavailable < 0)
|
||||
if ((*settings)[MaterializedMySQLSetting::max_wait_time_when_mysql_unavailable] < 0)
|
||||
throw;
|
||||
|
||||
if (!shouldReconnectOnException(std::current_exception()))
|
||||
@ -619,7 +633,7 @@ bool MaterializedMySQLSyncThread::prepareSynchronized(MaterializeMetadata & meta
|
||||
|
||||
setSynchronizationThreadException(std::current_exception());
|
||||
/// Avoid busy loop when MySQL is not available.
|
||||
sleepForMilliseconds(settings->max_wait_time_when_mysql_unavailable);
|
||||
sleepForMilliseconds((*settings)[MaterializedMySQLSetting::max_wait_time_when_mysql_unavailable]);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -10,7 +10,6 @@
|
||||
# include <DataTypes/DataTypesNumber.h>
|
||||
# include <Databases/DatabaseOrdinary.h>
|
||||
# include <Databases/IDatabase.h>
|
||||
# include <Databases/MySQL/MaterializedMySQLSettings.h>
|
||||
# include <Databases/MySQL/MySQLBinlogClient.h>
|
||||
# include <Parsers/ASTCreateQuery.h>
|
||||
# include <QueryPipeline/BlockIO.h>
|
||||
@ -21,6 +20,7 @@
|
||||
namespace DB
|
||||
{
|
||||
struct MaterializeMetadata;
|
||||
struct MaterializedMySQLSettings;
|
||||
|
||||
/** MySQL table structure and data synchronization thread
|
||||
*
|
||||
|
@ -36,6 +36,12 @@ namespace Setting
|
||||
extern const SettingsUInt64 glob_expansion_max_elements;
|
||||
}
|
||||
|
||||
namespace MySQLSetting
|
||||
{
|
||||
extern const MySQLSettingsUInt64 connect_timeout;
|
||||
extern const MySQLSettingsUInt64 read_write_timeout;
|
||||
}
|
||||
|
||||
[[maybe_unused]]
|
||||
static const size_t default_num_tries_on_connection_loss = 3;
|
||||
|
||||
@ -82,8 +88,9 @@ void registerDictionarySourceMysql(DictionarySourceFactory & factory)
|
||||
if (named_collection)
|
||||
{
|
||||
auto allowed_arguments{dictionary_allowed_keys};
|
||||
for (const auto & setting : mysql_settings.all())
|
||||
allowed_arguments.insert(setting.getName());
|
||||
auto setting_names = mysql_settings.getAllRegisteredNames();
|
||||
for (const auto & name : setting_names)
|
||||
allowed_arguments.insert(name);
|
||||
validateNamedCollection<ValidateKeysMultiset<ExternalDatabaseEqualKeysSet>>(*named_collection, {}, allowed_arguments);
|
||||
|
||||
StorageMySQL::Configuration::Addresses addresses;
|
||||
@ -115,17 +122,12 @@ void registerDictionarySourceMysql(DictionarySourceFactory & factory)
|
||||
});
|
||||
|
||||
const auto & settings = global_context->getSettingsRef();
|
||||
if (!mysql_settings.isChanged("connect_timeout"))
|
||||
mysql_settings.connect_timeout = settings[Setting::external_storage_connect_timeout_sec];
|
||||
if (!mysql_settings.isChanged("read_write_timeout"))
|
||||
mysql_settings.read_write_timeout = settings[Setting::external_storage_rw_timeout_sec];
|
||||
if (!mysql_settings[MySQLSetting::connect_timeout].changed)
|
||||
mysql_settings[MySQLSetting::connect_timeout] = settings[Setting::external_storage_connect_timeout_sec];
|
||||
if (!mysql_settings[MySQLSetting::read_write_timeout].changed)
|
||||
mysql_settings[MySQLSetting::read_write_timeout] = settings[Setting::external_storage_rw_timeout_sec];
|
||||
|
||||
for (const auto & setting : mysql_settings.all())
|
||||
{
|
||||
const auto & setting_name = setting.getName();
|
||||
if (named_collection->has(setting_name))
|
||||
mysql_settings.set(setting_name, named_collection->get<String>(setting_name));
|
||||
}
|
||||
mysql_settings.loadFromNamedCollection(*named_collection);
|
||||
|
||||
pool = std::make_shared<mysqlxx::PoolWithFailover>(
|
||||
createMySQLPoolWithFailover(
|
||||
|
@ -18,7 +18,6 @@
|
||||
#include <Common/KnownObjectNames.h>
|
||||
#include <Common/RemoteHostFilter.h>
|
||||
#include <Common/tryGetFileNameByFileDescriptor.h>
|
||||
#include <Core/FormatFactorySettingsDeclaration.h>
|
||||
#include <Core/FormatFactorySettings.h>
|
||||
#include <Core/Settings.h>
|
||||
|
||||
|
@ -87,8 +87,8 @@ void KeyMetadata::assertAccess(const UserID & user_id_) const
|
||||
if (!checkAccess(user_id_))
|
||||
{
|
||||
throw Exception(ErrorCodes::FILECACHE_ACCESS_DENIED,
|
||||
"Metadata for key {} belongs to user {}, but user {} requested it",
|
||||
key.toString(), user.user_id, user_id_);
|
||||
"Metadata for key {} belongs to another user",
|
||||
key.toString());
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -66,6 +66,11 @@ namespace Setting
|
||||
extern const SettingsBool use_hedged_requests;
|
||||
}
|
||||
|
||||
namespace DistributedSetting
|
||||
{
|
||||
extern const DistributedSettingsBool skip_unavailable_shards;
|
||||
}
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int TOO_LARGE_DISTRIBUTED_DEPTH;
|
||||
@ -155,7 +160,7 @@ ContextMutablePtr updateSettingsAndClientInfoForCluster(const Cluster & cluster,
|
||||
|
||||
if (!settings[Setting::skip_unavailable_shards].changed && distributed_settings)
|
||||
{
|
||||
new_settings[Setting::skip_unavailable_shards] = distributed_settings->skip_unavailable_shards.value;
|
||||
new_settings[Setting::skip_unavailable_shards] = (*distributed_settings)[DistributedSetting::skip_unavailable_shards].value;
|
||||
new_settings[Setting::skip_unavailable_shards].changed = true;
|
||||
}
|
||||
|
||||
|
@ -1124,15 +1124,15 @@ Strings Context::getWarnings() const
|
||||
SharedLockGuard lock(shared->mutex);
|
||||
common_warnings = shared->warnings;
|
||||
if (CurrentMetrics::get(CurrentMetrics::AttachedTable) > static_cast<Int64>(shared->max_table_num_to_warn))
|
||||
common_warnings.emplace_back(fmt::format("The number of attached tables is more than {}", shared->max_table_num_to_warn));
|
||||
common_warnings.emplace_back(fmt::format("The number of attached tables is more than {}.", shared->max_table_num_to_warn));
|
||||
if (CurrentMetrics::get(CurrentMetrics::AttachedView) > static_cast<Int64>(shared->max_view_num_to_warn))
|
||||
common_warnings.emplace_back(fmt::format("The number of attached views is more than {}", shared->max_view_num_to_warn));
|
||||
common_warnings.emplace_back(fmt::format("The number of attached views is more than {}.", shared->max_view_num_to_warn));
|
||||
if (CurrentMetrics::get(CurrentMetrics::AttachedDictionary) > static_cast<Int64>(shared->max_dictionary_num_to_warn))
|
||||
common_warnings.emplace_back(fmt::format("The number of attached dictionaries is more than {}", shared->max_dictionary_num_to_warn));
|
||||
common_warnings.emplace_back(fmt::format("The number of attached dictionaries is more than {}.", shared->max_dictionary_num_to_warn));
|
||||
if (CurrentMetrics::get(CurrentMetrics::AttachedDatabase) > static_cast<Int64>(shared->max_database_num_to_warn))
|
||||
common_warnings.emplace_back(fmt::format("The number of attached databases is more than {}", shared->max_database_num_to_warn));
|
||||
common_warnings.emplace_back(fmt::format("The number of attached databases is more than {}.", shared->max_database_num_to_warn));
|
||||
if (CurrentMetrics::get(CurrentMetrics::PartsActive) > static_cast<Int64>(shared->max_part_num_to_warn))
|
||||
common_warnings.emplace_back(fmt::format("The number of active parts is more than {}", shared->max_part_num_to_warn));
|
||||
common_warnings.emplace_back(fmt::format("The number of active parts is more than {}.", shared->max_part_num_to_warn));
|
||||
}
|
||||
/// Make setting's name ordered
|
||||
auto obsolete_settings = settings->getChangedAndObsoleteNames();
|
||||
|
@ -13,6 +13,7 @@
|
||||
#include <Databases/DatabaseMemory.h>
|
||||
#include <Databases/DatabaseOnDisk.h>
|
||||
#include <Disks/IDisk.h>
|
||||
#include <Storages/MemorySettings.h>
|
||||
#include <Storages/StorageMemory.h>
|
||||
#include <Core/BackgroundSchedulePool.h>
|
||||
#include <Parsers/formatAST.h>
|
||||
@ -141,7 +142,7 @@ TemporaryTableHolder::TemporaryTableHolder(
|
||||
context_,
|
||||
[&](const StorageID & table_id)
|
||||
{
|
||||
auto storage = std::make_shared<StorageMemory>(table_id, ColumnsDescription{columns}, ConstraintsDescription{constraints}, String{});
|
||||
auto storage = std::make_shared<StorageMemory>(table_id, ColumnsDescription{columns}, ConstraintsDescription{constraints}, String{}, MemorySettings{});
|
||||
|
||||
if (create_for_global_subquery)
|
||||
storage->delayReadForGlobalSubqueries();
|
||||
|
@ -1708,7 +1708,8 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional<P
|
||||
executeLimitBy(query_plan);
|
||||
}
|
||||
|
||||
if (query.limitLength() && !query.limitBy())
|
||||
/// WITH TIES simply not supported properly for preliminary steps, so let's disable it.
|
||||
if (query.limitLength() && !query.limitBy() && !query.limit_with_ties)
|
||||
executePreLimit(query_plan, true);
|
||||
}
|
||||
};
|
||||
@ -2083,7 +2084,7 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional<P
|
||||
|
||||
/// If we have 'WITH TIES', we need execute limit before projection,
|
||||
/// because in that case columns from 'ORDER BY' are used.
|
||||
if (query.limit_with_ties && apply_offset)
|
||||
if (query.limit_with_ties && apply_limit && apply_offset)
|
||||
{
|
||||
executeLimit(query_plan);
|
||||
}
|
||||
|
@ -68,6 +68,8 @@ ColumnsDescription PartLogElement::getColumnsDescription()
|
||||
{"RemovePart", static_cast<Int8>(REMOVE_PART)},
|
||||
{"MutatePart", static_cast<Int8>(MUTATE_PART)},
|
||||
{"MovePart", static_cast<Int8>(MOVE_PART)},
|
||||
{"MergePartsStart", static_cast<Int8>(MERGE_PARTS_START)},
|
||||
{"MutatePartStart", static_cast<Int8>(MUTATE_PART_START)},
|
||||
}
|
||||
);
|
||||
|
||||
@ -102,10 +104,12 @@ ColumnsDescription PartLogElement::getColumnsDescription()
|
||||
"Type of the event that occurred with the data part. "
|
||||
"Can have one of the following values: "
|
||||
"NewPart — Inserting of a new data part, "
|
||||
"MergeParts — Merging of data parts, "
|
||||
"MergePartsStart — Merging of data parts has started, "
|
||||
"MergeParts — Merging of data parts has finished, "
|
||||
"DownloadPart — Downloading a data part, "
|
||||
"RemovePart — Removing or detaching a data part using DETACH PARTITION, "
|
||||
"MutatePart — Mutating of a data part, "
|
||||
"RemovePart — Removing or detaching a data part using [DETACH PARTITION](../../sql-reference/statements/alter/partition.md#alter_detach-partition)."
|
||||
"MutatePartStart — Mutating of a data part has started, "
|
||||
"MutatePart — Mutating of a data part has finished, "
|
||||
"MovePart — Moving the data part from the one disk to another one."},
|
||||
{"merge_reason", std::move(merge_reason_datatype),
|
||||
"The reason for the event with type MERGE_PARTS. Can have one of the following values: "
|
||||
|
@ -26,6 +26,8 @@ struct PartLogElement
|
||||
REMOVE_PART = 4,
|
||||
MUTATE_PART = 5,
|
||||
MOVE_PART = 6,
|
||||
MERGE_PARTS_START = 7,
|
||||
MUTATE_PART_START = 8,
|
||||
};
|
||||
|
||||
/// Copy of MergeAlgorithm since values are written to disk.
|
||||
@ -135,7 +137,7 @@ public:
|
||||
|
||||
static PartLogEntries createPartLogEntries(const MutableDataPartsVector & parts, UInt64 elapsed_ns, ProfileCountersSnapshotPtr profile_counters = {});
|
||||
|
||||
/// Add a record about creation of new part.
|
||||
/// Add a record about creation of a new part.
|
||||
static bool addNewPart(ContextPtr context, const PartLogEntry & part,
|
||||
const ExecutionStatus & execution_status = {});
|
||||
|
||||
|
@ -1,12 +1,14 @@
|
||||
#include "Loggers.h"
|
||||
|
||||
#include "OwnFormattingChannel.h"
|
||||
#include "OwnPatternFormatter.h"
|
||||
#include "OwnSplitChannel.h"
|
||||
#include <Loggers/OwnFilteringChannel.h>
|
||||
#include <Loggers/OwnFormattingChannel.h>
|
||||
#include <Loggers/OwnPatternFormatter.h>
|
||||
#include <Loggers/OwnSplitChannel.h>
|
||||
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
|
||||
#include <Poco/AutoPtr.h>
|
||||
#include <Poco/ConsoleChannel.h>
|
||||
#include <Poco/Logger.h>
|
||||
#include <Poco/Net/RemoteSyslogChannel.h>
|
||||
@ -222,6 +224,18 @@ void Loggers::buildLoggers(Poco::Util::AbstractConfiguration & config, Poco::Log
|
||||
logger.close();
|
||||
|
||||
logger.setChannel(split);
|
||||
|
||||
const std::string global_pos_pattern = config.getRawString("logger.message_regexp", "");
|
||||
const std::string global_neg_pattern = config.getRawString("logger.message_regexp_negative", "");
|
||||
|
||||
Poco::AutoPtr<OwnPatternFormatter> pf;
|
||||
if (config.getString("logger.formatting.type", "") == "json")
|
||||
pf = new OwnJSONPatternFormatter(config);
|
||||
else
|
||||
pf = new OwnPatternFormatter;
|
||||
|
||||
DB::createOrUpdateFilterChannel(logger, global_pos_pattern, global_neg_pattern, pf, Poco::Logger::ROOT);
|
||||
|
||||
logger.setLevel(max_log_level);
|
||||
|
||||
// Global logging level and channel (it can be overridden for specific loggers).
|
||||
@ -236,6 +250,8 @@ void Loggers::buildLoggers(Poco::Util::AbstractConfiguration & config, Poco::Log
|
||||
{
|
||||
logger.get(name).setLevel(max_log_level);
|
||||
logger.get(name).setChannel(split);
|
||||
|
||||
DB::createOrUpdateFilterChannel(logger.get(name), global_pos_pattern, global_neg_pattern, pf, name);
|
||||
}
|
||||
|
||||
// Explicitly specified log levels for specific loggers.
|
||||
@ -262,6 +278,26 @@ void Loggers::buildLoggers(Poco::Util::AbstractConfiguration & config, Poco::Log
|
||||
}
|
||||
}
|
||||
}
|
||||
// Explicitly specified regexp patterns for filtering specific loggers
|
||||
{
|
||||
Poco::Util::AbstractConfiguration::Keys loggers_regexp;
|
||||
config.keys("logger.message_regexps", loggers_regexp);
|
||||
|
||||
if (!loggers_regexp.empty())
|
||||
{
|
||||
for (const auto & key : loggers_regexp)
|
||||
{
|
||||
if (key == "logger" || key.starts_with("logger["))
|
||||
{
|
||||
const std::string name = config.getString("logger.message_regexps." + key + ".name");
|
||||
const std::string pos_pattern = config.getRawString("logger.message_regexps." + key + ".message_regexp", global_pos_pattern);
|
||||
const std::string neg_pattern = config.getRawString("logger.message_regexps." + key + ".message_regexp_negative", global_neg_pattern);
|
||||
|
||||
DB::createOrUpdateFilterChannel(logger.root().get(name), pos_pattern, neg_pattern, pf, name);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
#ifndef WITHOUT_TEXT_LOG
|
||||
if (allowTextLog() && config.has("text_log"))
|
||||
{
|
||||
@ -347,16 +383,32 @@ void Loggers::updateLevels(Poco::Util::AbstractConfiguration & config, Poco::Log
|
||||
}
|
||||
split->setLevel("syslog", syslog_level);
|
||||
|
||||
const std::string global_pos_pattern = config.getRawString("logger.message_regexp", "");
|
||||
const std::string global_neg_pattern = config.getRawString("logger.message_regexp_negative", "");
|
||||
|
||||
Poco::AutoPtr<OwnPatternFormatter> pf;
|
||||
if (config.getString("logger.formatting.type", "") == "json")
|
||||
pf = new OwnJSONPatternFormatter(config);
|
||||
else
|
||||
pf = new OwnPatternFormatter;
|
||||
|
||||
DB::createOrUpdateFilterChannel(logger, global_pos_pattern, global_neg_pattern, pf, Poco::Logger::ROOT);
|
||||
|
||||
// Global logging level (it can be overridden for specific loggers).
|
||||
logger.setLevel(max_log_level);
|
||||
|
||||
// Set level to all already created loggers
|
||||
std::vector<std::string> names;
|
||||
|
||||
logger.root().names(names);
|
||||
|
||||
// Set all to global in case logger.levels are not specified
|
||||
for (const auto & name : names)
|
||||
{
|
||||
logger.root().get(name).setLevel(max_log_level);
|
||||
|
||||
DB::createOrUpdateFilterChannel(logger.root().get(name), global_pos_pattern, global_neg_pattern, pf, name);
|
||||
}
|
||||
|
||||
logger.root().setLevel(max_log_level);
|
||||
|
||||
// Explicitly specified log levels for specific loggers.
|
||||
@ -383,6 +435,27 @@ void Loggers::updateLevels(Poco::Util::AbstractConfiguration & config, Poco::Log
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Explicitly specified regexp patterns for filtering specific loggers
|
||||
{
|
||||
Poco::Util::AbstractConfiguration::Keys loggers_regexp;
|
||||
config.keys("logger.message_regexps", loggers_regexp);
|
||||
|
||||
if (!loggers_regexp.empty())
|
||||
{
|
||||
for (const auto & key : loggers_regexp)
|
||||
{
|
||||
if (key == "logger" || key.starts_with("logger["))
|
||||
{
|
||||
const std::string name(config.getString("logger.message_regexps." + key + ".name"));
|
||||
const std::string pos_pattern(config.getRawString("logger.message_regexps." + key + ".message_regexp", global_pos_pattern));
|
||||
const std::string neg_pattern(config.getRawString("logger.message_regexps." + key + ".message_regexp_negative", global_neg_pattern));
|
||||
|
||||
DB::createOrUpdateFilterChannel(logger.root().get(name), pos_pattern, neg_pattern, pf, name);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// NOLINTEND(readability-static-accessed-through-instance)
|
||||
|
96
src/Loggers/OwnFilteringChannel.cpp
Normal file
96
src/Loggers/OwnFilteringChannel.cpp
Normal file
@ -0,0 +1,96 @@
|
||||
#include <shared_mutex>
|
||||
#include <Loggers/OwnFilteringChannel.h>
|
||||
#include <Poco/RegularExpression.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
void OwnFilteringChannel::log(const Poco::Message & msg)
|
||||
{
|
||||
if (regexpFilteredOut(msg))
|
||||
return;
|
||||
|
||||
pChannel->log(msg);
|
||||
}
|
||||
|
||||
bool OwnFilteringChannel::regexpFilteredOut(const Poco::Message & msg)
|
||||
{
|
||||
std::string formatted_text;
|
||||
auto [pos_pattern, neg_pattern] = safeGetPatterns();
|
||||
|
||||
// Skip checks if both patterns are empty
|
||||
if (!pos_pattern.empty() || !neg_pattern.empty())
|
||||
{
|
||||
// Apply formatting to the text
|
||||
if (pFormatter)
|
||||
{
|
||||
pFormatter->formatExtended(ExtendedLogMessage::getFrom(msg), formatted_text);
|
||||
}
|
||||
else
|
||||
{
|
||||
formatted_text = msg.getText();
|
||||
}
|
||||
|
||||
// Check for patterns in formatted text
|
||||
Poco::RegularExpression positive_regexp(pos_pattern);
|
||||
if (!pos_pattern.empty() && !positive_regexp.match(formatted_text))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
Poco::RegularExpression negative_regexp(neg_pattern);
|
||||
if (!neg_pattern.empty() && negative_regexp.match(formatted_text))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void OwnFilteringChannel::setRegexpPatterns(const std::string & new_pos_pattern, const std::string & new_neg_pattern)
|
||||
{
|
||||
auto [old_pos_pattern, old_neg_pattern] = safeGetPatterns();
|
||||
if (old_pos_pattern != new_pos_pattern || old_neg_pattern != new_neg_pattern)
|
||||
{
|
||||
std::unique_lock<std::shared_mutex> write_lock(pattern_mutex);
|
||||
positive_pattern = new_pos_pattern;
|
||||
negative_pattern = new_neg_pattern;
|
||||
}
|
||||
}
|
||||
|
||||
std::pair<std::string, std::string> OwnFilteringChannel::safeGetPatterns()
|
||||
{
|
||||
std::shared_lock<std::shared_mutex> read_lock(pattern_mutex);
|
||||
return std::make_pair(positive_pattern, negative_pattern);
|
||||
}
|
||||
|
||||
void createOrUpdateFilterChannel(Poco::Logger & logger, const std::string & pos_pattern, const std::string & neg_pattern, Poco::AutoPtr<OwnPatternFormatter> pf, const std::string & name)
|
||||
{
|
||||
Poco::AutoPtr<Poco::Channel> src_channel(logger.getChannel(), true /*shared*/);
|
||||
Poco::AutoPtr<DB::OwnFilteringChannel> filter_channel(dynamic_cast<DB::OwnFilteringChannel*>(src_channel.get()), true);
|
||||
|
||||
// If this logger doesn't have it's own unique filter channel
|
||||
if (!filter_channel)
|
||||
{
|
||||
// Skip if regexp feature has never been used yet
|
||||
if (pos_pattern.empty() && neg_pattern.empty())
|
||||
return;
|
||||
|
||||
Poco::AutoPtr<DB::OwnFilteringChannel> new_filter_channel = new DB::OwnFilteringChannel(src_channel, pf, pos_pattern, neg_pattern, name);
|
||||
logger.setChannel(new_filter_channel);
|
||||
}
|
||||
// If logger has filter channel, but not it's own unique one (e.g copied from another by default), create copy
|
||||
else if (filter_channel->getAssignedLoggerName() != name)
|
||||
{
|
||||
Poco::AutoPtr<DB::OwnFilteringChannel> new_filter_channel = new DB::OwnFilteringChannel(filter_channel, pos_pattern, neg_pattern, name);
|
||||
logger.setChannel(new_filter_channel);
|
||||
}
|
||||
else
|
||||
{
|
||||
filter_channel->setRegexpPatterns(pos_pattern, neg_pattern);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
84
src/Loggers/OwnFilteringChannel.h
Normal file
84
src/Loggers/OwnFilteringChannel.h
Normal file
@ -0,0 +1,84 @@
|
||||
#pragma once
|
||||
#include <Poco/AutoPtr.h>
|
||||
#include <Poco/Channel.h>
|
||||
#include <Poco/Message.h>
|
||||
#include <Poco/Logger.h>
|
||||
#include <Poco/Util/AbstractConfiguration.h>
|
||||
#include <Loggers/OwnPatternFormatter.h>
|
||||
#include <shared_mutex>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
// Filters the logs based on regular expressions. Should be processed after formatting channel to read entire formatted text
|
||||
class OwnFilteringChannel : public Poco::Channel
|
||||
{
|
||||
public:
|
||||
explicit OwnFilteringChannel(Poco::AutoPtr<Poco::Channel> pChannel_, Poco::AutoPtr<OwnPatternFormatter> pf,
|
||||
const std::string & positive_pattern_, const std::string & negative_pattern_, const std::string & name_)
|
||||
: logger_name(name_), positive_pattern(positive_pattern_), negative_pattern(negative_pattern_), pChannel(pChannel_), pFormatter(pf)
|
||||
{
|
||||
}
|
||||
|
||||
explicit OwnFilteringChannel(Poco::AutoPtr<OwnFilteringChannel> other, const std::string & positive_pattern_, const std::string & negative_pattern_, const std::string & name_)
|
||||
: logger_name(name_), positive_pattern(positive_pattern_), negative_pattern(negative_pattern_), pChannel(other->pChannel), pFormatter(other->pFormatter)
|
||||
{
|
||||
}
|
||||
|
||||
// Only log if pass both positive and negative regexp checks.
|
||||
// Checks the regexps on the formatted text (without color), but then passes the raw text
|
||||
// to the split channel to handle formatting for individual channels (e.g apply color)
|
||||
void log(const Poco::Message & msg) override;
|
||||
|
||||
// Sets the regex patterns to use for filtering. Specifying an empty string pattern "" indicates no filtering
|
||||
void setRegexpPatterns(const std::string & new_pos_pattern, const std::string & new_neg_pattern);
|
||||
|
||||
std::string getAssignedLoggerName() const
|
||||
{
|
||||
return logger_name;
|
||||
}
|
||||
|
||||
void open() override
|
||||
{
|
||||
if (pChannel)
|
||||
pChannel->open();
|
||||
}
|
||||
|
||||
void close() override
|
||||
{
|
||||
if (pChannel)
|
||||
pChannel->close();
|
||||
}
|
||||
|
||||
void setProperty(const std::string & name, const std::string & value) override
|
||||
{
|
||||
if (pChannel)
|
||||
pChannel->setProperty(name, value);
|
||||
}
|
||||
|
||||
std::string getProperty(const std::string & name) const override
|
||||
{
|
||||
if (pChannel)
|
||||
return pChannel->getProperty(name);
|
||||
return "";
|
||||
}
|
||||
|
||||
private:
|
||||
bool regexpFilteredOut(const Poco::Message & msg);
|
||||
|
||||
// Create copy safely, so we don't have to worry about race conditions from reading and writing at the same time
|
||||
std::pair<std::string, std::string> safeGetPatterns();
|
||||
|
||||
const std::string logger_name;
|
||||
std::string positive_pattern;
|
||||
std::string negative_pattern;
|
||||
Poco::AutoPtr<Poco::Channel> pChannel;
|
||||
Poco::AutoPtr<OwnPatternFormatter> pFormatter;
|
||||
std::shared_mutex pattern_mutex;
|
||||
};
|
||||
|
||||
// Creates filter channel only if needed or updates if it already exists
|
||||
void createOrUpdateFilterChannel(Poco::Logger & logger, const std::string & pos_pattern, const std::string & neg_pattern, Poco::AutoPtr<OwnPatternFormatter> pf, const std::string & name = "");
|
||||
|
||||
}
|
@ -858,9 +858,8 @@ void addWithFillStepIfNeeded(QueryPlan & query_plan,
|
||||
query_plan.addStep(std::move(filling_step));
|
||||
}
|
||||
|
||||
void addLimitByStep(QueryPlan & query_plan,
|
||||
const LimitByAnalysisResult & limit_by_analysis_result,
|
||||
const QueryNode & query_node)
|
||||
void addLimitByStep(
|
||||
QueryPlan & query_plan, const LimitByAnalysisResult & limit_by_analysis_result, const QueryNode & query_node, bool do_not_skip_offset)
|
||||
{
|
||||
/// Constness of LIMIT BY limit is validated during query analysis stage
|
||||
UInt64 limit_by_limit = query_node.getLimitByLimit()->as<ConstantNode &>().getValue().safeGet<UInt64>();
|
||||
@ -872,6 +871,15 @@ void addLimitByStep(QueryPlan & query_plan,
|
||||
limit_by_offset = query_node.getLimitByOffset()->as<ConstantNode &>().getValue().safeGet<UInt64>();
|
||||
}
|
||||
|
||||
if (do_not_skip_offset)
|
||||
{
|
||||
if (limit_by_limit > std::numeric_limits<UInt64>::max() - limit_by_offset)
|
||||
return;
|
||||
|
||||
limit_by_limit += limit_by_offset;
|
||||
limit_by_offset = 0;
|
||||
}
|
||||
|
||||
auto limit_by_step = std::make_unique<LimitByStep>(query_plan.getCurrentHeader(),
|
||||
limit_by_limit,
|
||||
limit_by_offset,
|
||||
@ -985,10 +993,14 @@ void addPreliminarySortOrDistinctOrLimitStepsIfNeeded(QueryPlan & query_plan,
|
||||
{
|
||||
auto & limit_by_analysis_result = expressions_analysis_result.getLimitBy();
|
||||
addExpressionStep(query_plan, limit_by_analysis_result.before_limit_by_actions, "Before LIMIT BY", useful_sets);
|
||||
addLimitByStep(query_plan, limit_by_analysis_result, query_node);
|
||||
/// We don't apply LIMIT BY on remote nodes at all in the old infrastructure.
|
||||
/// https://github.com/ClickHouse/ClickHouse/blob/67c1e89d90ef576e62f8b1c68269742a3c6f9b1e/src/Interpreters/InterpreterSelectQuery.cpp#L1697-L1705
|
||||
/// Let's be optimistic and only don't skip offset (it will be skipped on the initiator).
|
||||
addLimitByStep(query_plan, limit_by_analysis_result, query_node, true /*do_not_skip_offset*/);
|
||||
}
|
||||
|
||||
if (query_node.hasLimit())
|
||||
/// WITH TIES simply not supported properly for preliminary steps, so let's disable it.
|
||||
if (query_node.hasLimit() && !query_node.hasLimitByOffset() && !query_node.isLimitWithTies())
|
||||
addPreliminaryLimitStep(query_plan, query_analysis_result, planner_context, true /*do_not_skip_offset*/);
|
||||
}
|
||||
|
||||
@ -1777,21 +1789,20 @@ void Planner::buildPlanForQueryNode()
|
||||
{
|
||||
auto & limit_by_analysis_result = expression_analysis_result.getLimitBy();
|
||||
addExpressionStep(query_plan, limit_by_analysis_result.before_limit_by_actions, "Before LIMIT BY", useful_sets);
|
||||
addLimitByStep(query_plan, limit_by_analysis_result, query_node);
|
||||
addLimitByStep(query_plan, limit_by_analysis_result, query_node, false /*do_not_skip_offset*/);
|
||||
}
|
||||
|
||||
if (query_node.hasOrderBy())
|
||||
addWithFillStepIfNeeded(query_plan, query_analysis_result, planner_context, query_node);
|
||||
|
||||
bool apply_offset = query_processing_info.getToStage() != QueryProcessingStage::WithMergeableStateAfterAggregationAndLimit;
|
||||
|
||||
if (query_node.hasLimit() && query_node.isLimitWithTies() && apply_offset)
|
||||
const bool apply_limit = query_processing_info.getToStage() != QueryProcessingStage::WithMergeableStateAfterAggregation;
|
||||
const bool apply_offset = query_processing_info.getToStage() != QueryProcessingStage::WithMergeableStateAfterAggregationAndLimit;
|
||||
if (query_node.hasLimit() && query_node.isLimitWithTies() && apply_limit && apply_offset)
|
||||
addLimitStep(query_plan, query_analysis_result, planner_context, query_node);
|
||||
|
||||
addExtremesStepIfNeeded(query_plan, planner_context);
|
||||
|
||||
bool limit_applied = applied_prelimit || (query_node.isLimitWithTies() && apply_offset);
|
||||
bool apply_limit = query_processing_info.getToStage() != QueryProcessingStage::WithMergeableStateAfterAggregation;
|
||||
|
||||
/** Limit is no longer needed if there is prelimit.
|
||||
*
|
||||
|
@ -175,6 +175,7 @@ namespace Setting
|
||||
extern const SettingsBool use_skip_indexes;
|
||||
extern const SettingsBool use_skip_indexes_if_final;
|
||||
extern const SettingsBool use_uncompressed_cache;
|
||||
extern const SettingsUInt64 merge_tree_min_read_task_size;
|
||||
}
|
||||
|
||||
namespace MergeTreeSetting
|
||||
@ -446,20 +447,17 @@ Pipe ReadFromMergeTree::readFromPoolParallelReplicas(RangesInDataParts parts_wit
|
||||
reader_settings,
|
||||
required_columns,
|
||||
pool_settings,
|
||||
block_size,
|
||||
context);
|
||||
|
||||
auto block_size_copy = block_size;
|
||||
block_size_copy.min_marks_to_read = pool_settings.min_marks_for_concurrent_read;
|
||||
|
||||
Pipes pipes;
|
||||
|
||||
for (size_t i = 0; i < pool_settings.threads; ++i)
|
||||
{
|
||||
auto algorithm = std::make_unique<MergeTreeThreadSelectAlgorithm>(i);
|
||||
|
||||
auto processor = std::make_unique<MergeTreeSelectProcessor>(
|
||||
pool, std::move(algorithm), prewhere_info,
|
||||
actions_settings, block_size_copy, reader_settings);
|
||||
auto processor
|
||||
= std::make_unique<MergeTreeSelectProcessor>(pool, std::move(algorithm), prewhere_info, actions_settings, reader_settings);
|
||||
|
||||
auto source = std::make_shared<MergeTreeSource>(std::move(processor), data.getLogName());
|
||||
pipes.emplace_back(std::move(source));
|
||||
@ -526,6 +524,7 @@ Pipe ReadFromMergeTree::readFromPool(
|
||||
reader_settings,
|
||||
required_columns,
|
||||
pool_settings,
|
||||
block_size,
|
||||
context);
|
||||
}
|
||||
else
|
||||
@ -540,25 +539,19 @@ Pipe ReadFromMergeTree::readFromPool(
|
||||
reader_settings,
|
||||
required_columns,
|
||||
pool_settings,
|
||||
block_size,
|
||||
context);
|
||||
}
|
||||
|
||||
LOG_DEBUG(log, "Reading approx. {} rows with {} streams", total_rows, pool_settings.threads);
|
||||
|
||||
/// The reason why we change this setting is because MergeTreeReadPool takes the full task
|
||||
/// ignoring min_marks_to_read setting in case of remote disk (see MergeTreeReadPool::getTask).
|
||||
/// In this case, we won't limit the number of rows to read based on adaptive granularity settings.
|
||||
auto block_size_copy = block_size;
|
||||
block_size_copy.min_marks_to_read = pool_settings.min_marks_for_concurrent_read;
|
||||
|
||||
Pipes pipes;
|
||||
for (size_t i = 0; i < pool_settings.threads; ++i)
|
||||
{
|
||||
auto algorithm = std::make_unique<MergeTreeThreadSelectAlgorithm>(i);
|
||||
|
||||
auto processor = std::make_unique<MergeTreeSelectProcessor>(
|
||||
pool, std::move(algorithm), prewhere_info,
|
||||
actions_settings, block_size_copy, reader_settings);
|
||||
auto processor
|
||||
= std::make_unique<MergeTreeSelectProcessor>(pool, std::move(algorithm), prewhere_info, actions_settings, reader_settings);
|
||||
|
||||
auto source = std::make_shared<MergeTreeSource>(std::move(processor), data.getLogName());
|
||||
|
||||
@ -627,6 +620,7 @@ Pipe ReadFromMergeTree::readInOrder(
|
||||
reader_settings,
|
||||
required_columns,
|
||||
pool_settings,
|
||||
block_size,
|
||||
context);
|
||||
}
|
||||
else
|
||||
@ -643,6 +637,7 @@ Pipe ReadFromMergeTree::readInOrder(
|
||||
reader_settings,
|
||||
required_columns,
|
||||
pool_settings,
|
||||
block_size,
|
||||
context);
|
||||
}
|
||||
|
||||
@ -676,9 +671,8 @@ Pipe ReadFromMergeTree::readInOrder(
|
||||
else
|
||||
algorithm = std::make_unique<MergeTreeInOrderSelectAlgorithm>(i);
|
||||
|
||||
auto processor = std::make_unique<MergeTreeSelectProcessor>(
|
||||
pool, std::move(algorithm), prewhere_info,
|
||||
actions_settings, block_size, reader_settings);
|
||||
auto processor
|
||||
= std::make_unique<MergeTreeSelectProcessor>(pool, std::move(algorithm), prewhere_info, actions_settings, reader_settings);
|
||||
|
||||
processor->addPartLevelToChunk(isQueryWithFinal());
|
||||
|
||||
@ -798,7 +792,7 @@ struct PartRangesReadInfo
|
||||
|
||||
min_marks_for_concurrent_read = MergeTreeDataSelectExecutor::minMarksForConcurrentRead(
|
||||
min_rows_for_concurrent_read, min_bytes_for_concurrent_read,
|
||||
data_settings[MergeTreeSetting::index_granularity], index_granularity_bytes, sum_marks);
|
||||
data_settings[MergeTreeSetting::index_granularity], index_granularity_bytes, settings[Setting::merge_tree_min_read_task_size], sum_marks);
|
||||
|
||||
use_uncompressed_cache = settings[Setting::use_uncompressed_cache];
|
||||
if (sum_marks > max_marks_to_use_cache)
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user