diff --git a/.gitmodules b/.gitmodules index bd61c52a5e0..bbc8fc7d06c 100644 --- a/.gitmodules +++ b/.gitmodules @@ -227,12 +227,6 @@ [submodule "contrib/minizip-ng"] path = contrib/minizip-ng url = https://github.com/zlib-ng/minizip-ng -[submodule "contrib/qpl"] - path = contrib/qpl - url = https://github.com/intel/qpl -[submodule "contrib/idxd-config"] - path = contrib/idxd-config - url = https://github.com/intel/idxd-config [submodule "contrib/QAT-ZSTD-Plugin"] path = contrib/QAT-ZSTD-Plugin url = https://github.com/intel/QAT-ZSTD-Plugin diff --git a/base/base/StringRef.h b/base/base/StringRef.h index af3441c2a75..aa2bce71032 100644 --- a/base/base/StringRef.h +++ b/base/base/StringRef.h @@ -369,11 +369,15 @@ namespace PackedZeroTraits { template class PackedPairNoInit> inline bool check(const PackedPairNoInit p) - { return 0 == p.key.size; } + { + return 0 == p.key.size; + } template class PackedPairNoInit> inline void set(PackedPairNoInit & p) - { p.key.size = 0; } + { + p.key.size = 0; + } } diff --git a/base/poco/Foundation/include/Poco/Logger.h b/base/poco/Foundation/include/Poco/Logger.h index 74ddceea9dd..f7da3c08fa3 100644 --- a/base/poco/Foundation/include/Poco/Logger.h +++ b/base/poco/Foundation/include/Poco/Logger.h @@ -952,6 +952,8 @@ private: static std::pair add(Logger * pLogger); static std::optional find(const std::string & name); static Logger * findRawPtr(const std::string & name); + void unsafeSetChannel(Channel * pChannel); + Channel* unsafeGetChannel() const; Logger(); Logger(const Logger &); diff --git a/base/poco/Foundation/src/Logger.cpp b/base/poco/Foundation/src/Logger.cpp index 779af384b0b..55564a7a175 100644 --- a/base/poco/Foundation/src/Logger.cpp +++ b/base/poco/Foundation/src/Logger.cpp @@ -61,6 +61,13 @@ Logger::~Logger() void Logger::setChannel(Channel* pChannel) +{ + std::lock_guard lock(getLoggerMutex()); + unsafeSetChannel(pChannel); +} + + +void Logger::unsafeSetChannel(Channel* pChannel) { if (_pChannel) _pChannel->release(); _pChannel = pChannel; @@ -69,6 +76,14 @@ void Logger::setChannel(Channel* pChannel) Channel* Logger::getChannel() const +{ + std::lock_guard lock(getLoggerMutex()); + + return unsafeGetChannel(); +} + + +Channel* Logger::unsafeGetChannel() const { return _pChannel; } @@ -89,7 +104,7 @@ void Logger::setLevel(const std::string& level) void Logger::setProperty(const std::string& name, const std::string& value) { if (name == "channel") - setChannel(LoggingRegistry::defaultRegistry().channelForName(value)); + unsafeSetChannel(LoggingRegistry::defaultRegistry().channelForName(value)); else if (name == "level") setLevel(value); else @@ -160,7 +175,7 @@ void Logger::setChannel(const std::string& name, Channel* pChannel) if (len == 0 || (it.first.compare(0, len, name) == 0 && (it.first.length() == len || it.first[len] == '.'))) { - it.second.logger->setChannel(pChannel); + it.second.logger->unsafeSetChannel(pChannel); } } } @@ -393,7 +408,7 @@ std::pair Logger::unsafeGet(const std::string& else { Logger& par = parent(name); - logger = new Logger(name, par.getChannel(), par.getLevel()); + logger = new Logger(name, par.unsafeGetChannel(), par.getLevel()); } return add(logger); diff --git a/ci/README.md b/ci/README.md new file mode 100644 index 00000000000..192243d598b --- /dev/null +++ b/ci/README.md @@ -0,0 +1 @@ +Note: This directory is under active development for CI improvements and is not currently in use within the scope of the existing CI pipeline. diff --git a/ci/docker/fasttest/Dockerfile b/ci/docker/fasttest/Dockerfile new file mode 100644 index 00000000000..02595ad0d0a --- /dev/null +++ b/ci/docker/fasttest/Dockerfile @@ -0,0 +1,105 @@ +# docker build -t clickhouse/fasttest . +FROM ubuntu:22.04 + +# ARG for quick switch to a given ubuntu mirror +ARG apt_archive="http://archive.ubuntu.com" +RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list + +ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=18 + +RUN apt-get update \ + && apt-get install \ + apt-transport-https \ + apt-utils \ + ca-certificates \ + curl \ + gnupg \ + lsb-release \ + wget \ + git \ + --yes --no-install-recommends --verbose-versions \ + && export LLVM_PUBKEY_HASH="bda960a8da687a275a2078d43c111d66b1c6a893a3275271beedf266c1ff4a0cdecb429c7a5cccf9f486ea7aa43fd27f" \ + && wget -nv -O /tmp/llvm-snapshot.gpg.key https://apt.llvm.org/llvm-snapshot.gpg.key \ + && echo "${LLVM_PUBKEY_HASH} /tmp/llvm-snapshot.gpg.key" | sha384sum -c \ + && apt-key add /tmp/llvm-snapshot.gpg.key \ + && export CODENAME="$(lsb_release --codename --short | tr 'A-Z' 'a-z')" \ + && echo "deb https://apt.llvm.org/${CODENAME}/ llvm-toolchain-${CODENAME}-${LLVM_VERSION} main" >> \ + /etc/apt/sources.list \ + && apt-get update \ + && apt-get install --yes --no-install-recommends --verbose-versions llvm-${LLVM_VERSION} \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/* + +# moreutils - provides ts fo FT +# expect, bzip2 - requried by FT +# bsdmainutils - provides hexdump for FT + +RUN apt-get update \ + && apt-get install \ + clang-${LLVM_VERSION} \ + cmake \ + libclang-${LLVM_VERSION}-dev \ + libclang-rt-${LLVM_VERSION}-dev \ + lld-${LLVM_VERSION} \ + llvm-${LLVM_VERSION}-dev \ + lsof \ + ninja-build \ + python3 \ + python3-pip \ + zstd \ + moreutils \ + expect \ + bsdmainutils \ + pv \ + jq \ + bzip2 \ + --yes --no-install-recommends \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/* + +COPY --from=clickhouse/cctools:0d6b90a7a490 /opt/gdb /opt/gdb +# Give suid to gdb to grant it attach permissions +RUN chmod u+s /opt/gdb/bin/gdb +ENV PATH="/opt/gdb/bin:${PATH}" + +# This symlink is required by gcc to find the lld linker +RUN ln -s /usr/bin/lld-${LLVM_VERSION} /usr/bin/ld.lld +# FIXME: workaround for "The imported target "merge-fdata" references the file" error +# https://salsa.debian.org/pkg-llvm-team/llvm-toolchain/-/commit/992e52c0b156a5ba9c6a8a54f8c4857ddd3d371d +RUN sed -i '/_IMPORT_CHECK_FILES_FOR_\(mlir-\|llvm-bolt\|merge-fdata\|MLIR\)/ {s|^|#|}' /usr/lib/llvm-${LLVM_VERSION}/lib/cmake/llvm/LLVMExports-*.cmake + +# LLVM changes paths for compiler-rt libraries. For some reason clang-18.1.8 cannot catch up libraries from default install path. +# It's very dirty workaround, better to build compiler and LLVM ourself and use it. Details: https://github.com/llvm/llvm-project/issues/95792 +RUN test ! -d /usr/lib/llvm-18/lib/clang/18/lib/x86_64-pc-linux-gnu || ln -s /usr/lib/llvm-18/lib/clang/18/lib/x86_64-pc-linux-gnu /usr/lib/llvm-18/lib/clang/18/lib/x86_64-unknown-linux-gnu + +ARG TARGETARCH +ARG SCCACHE_VERSION=v0.7.7 +ENV SCCACHE_IGNORE_SERVER_IO_ERROR=1 +# sccache requires a value for the region. So by default we use The Default Region +ENV SCCACHE_REGION=us-east-1 +RUN arch=${TARGETARCH} \ + && case $arch in \ + amd64) rarch=x86_64 ;; \ + arm64) rarch=aarch64 ;; \ + esac \ + && curl -Ls "https://github.com/mozilla/sccache/releases/download/$SCCACHE_VERSION/sccache-$SCCACHE_VERSION-$rarch-unknown-linux-musl.tar.gz" | \ + tar xz -C /tmp \ + && mv "/tmp/sccache-$SCCACHE_VERSION-$rarch-unknown-linux-musl/sccache" /usr/bin \ + && rm "/tmp/sccache-$SCCACHE_VERSION-$rarch-unknown-linux-musl" -r + +COPY requirements.txt / +RUN pip3 install --no-cache-dir -r /requirements.txt + +# chmod 777 to make the container user independent +RUN mkdir -p /var/lib/clickhouse \ + && chmod 777 /var/lib/clickhouse + +ENV TZ=Europe/Amsterdam +RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone + +RUN groupadd --system --gid 1000 clickhouse \ + && useradd --system --gid 1000 --uid 1000 -m clickhouse \ + && mkdir -p /.cache/sccache && chmod 777 /.cache/sccache + +ENV PYTHONPATH="/wd" +ENV PYTHONUNBUFFERED=1 diff --git a/ci/docker/fasttest/requirements.txt b/ci/docker/fasttest/requirements.txt new file mode 100644 index 00000000000..a1488ee33f0 --- /dev/null +++ b/ci/docker/fasttest/requirements.txt @@ -0,0 +1,6 @@ +Jinja2==3.1.3 +numpy==1.26.4 +requests==2.32.3 +pandas==1.5.3 +scipy==1.12.0 +#https://clickhouse-builds.s3.amazonaws.com/packages/praktika-0.1-py3-none-any.whl diff --git a/ci_v2/docker/style-test/Dockerfile b/ci/docker/style-test/Dockerfile similarity index 100% rename from ci_v2/docker/style-test/Dockerfile rename to ci/docker/style-test/Dockerfile diff --git a/ci/docker/style-test/requirements.txt b/ci/docker/style-test/requirements.txt new file mode 100644 index 00000000000..ab48f245fd2 --- /dev/null +++ b/ci/docker/style-test/requirements.txt @@ -0,0 +1,5 @@ +requests==2.32.3 +yamllint==1.26.3 +codespell==2.2.1 +#use praktika from CH repo +#https://clickhouse-builds.s3.amazonaws.com/packages/praktika-0.1-py3-none-any.whl diff --git a/ci_v2/jobs/check_style.py b/ci/jobs/check_style.py similarity index 81% rename from ci_v2/jobs/check_style.py rename to ci/jobs/check_style.py index 4dd3864e865..1b1b0bf689b 100644 --- a/ci_v2/jobs/check_style.py +++ b/ci/jobs/check_style.py @@ -2,7 +2,6 @@ import math import multiprocessing import os import re -import sys from concurrent.futures import ProcessPoolExecutor from pathlib import Path @@ -51,25 +50,6 @@ def run_check_concurrent(check_name, check_function, files, nproc=NPROC): return result -def run_simple_check(check_name, check_function, **kwargs): - stop_watch = Utils.Stopwatch() - - error = check_function(**kwargs) - - result = Result( - name=check_name, - status=Result.Status.SUCCESS if not error else Result.Status.FAILED, - start_time=stop_watch.start_time, - duration=stop_watch.duration, - info=error, - ) - return result - - -def run_check(check_name, check_function, files): - return run_check_concurrent(check_name, check_function, files, nproc=1) - - def check_duplicate_includes(file_path): includes = [] with open(file_path, "r", encoding="utf-8", errors="ignore") as f: @@ -117,7 +97,7 @@ def check_xmllint(file_paths): def check_functional_test_cases(files): """ Queries with event_date should have yesterday() not today() - NOTE: it is not that accuate, but at least something. + NOTE: it is not that accurate, but at least something. """ patterns = [ @@ -345,66 +325,58 @@ if __name__ == "__main__": ) ) results.append( - run_check( - check_name="Check Tests Numbers", - check_function=check_gaps_in_tests_numbers, - files=functional_test_files, + Result.create_from_command_execution( + name="Check Tests Numbers", + command=check_gaps_in_tests_numbers, + command_args=[functional_test_files], ) ) results.append( - run_simple_check( - check_name="Check Broken Symlinks", - check_function=check_broken_links, - path="./", - exclude_paths=["contrib/", "metadata/", "programs/server/data"], + Result.create_from_command_execution( + name="Check Broken Symlinks", + command=check_broken_links, + command_kwargs={ + "path": "./", + "exclude_paths": ["contrib/", "metadata/", "programs/server/data"], + }, ) ) results.append( - run_simple_check( - check_name="Check CPP code", - check_function=check_cpp_code, + Result.create_from_command_execution( + name="Check CPP code", + command=check_cpp_code, ) ) results.append( - run_simple_check( - check_name="Check Submodules", - check_function=check_repo_submodules, + Result.create_from_command_execution( + name="Check Submodules", + command=check_repo_submodules, ) ) results.append( - run_check( - check_name="Check File Names", - check_function=check_file_names, - files=all_files, + Result.create_from_command_execution( + name="Check File Names", + command=check_file_names, + command_args=[all_files], ) ) results.append( - run_simple_check( - check_name="Check Many Different Things", - check_function=check_other, + Result.create_from_command_execution( + name="Check Many Different Things", + command=check_other, ) ) results.append( - run_simple_check( - check_name="Check Codespell", - check_function=check_codespell, + Result.create_from_command_execution( + name="Check Codespell", + command=check_codespell, ) ) results.append( - run_simple_check( - check_name="Check Aspell", - check_function=check_aspell, + Result.create_from_command_execution( + name="Check Aspell", + command=check_aspell, ) ) - res = Result.create_from(results=results, stopwatch=stop_watch).dump() - - if not res.is_ok(): - print("Style check: failed") - for result in results: - if not result.is_ok(): - print("Failed check:") - print(" | ", result) - sys.exit(1) - else: - print("Style check: ok") + Result.create_from(results=results, stopwatch=stop_watch).finish_job_accordingly() diff --git a/ci/jobs/fast_test.py b/ci/jobs/fast_test.py new file mode 100644 index 00000000000..b82c17aa42c --- /dev/null +++ b/ci/jobs/fast_test.py @@ -0,0 +1,329 @@ +import threading +from pathlib import Path + +from ci_v2.jobs.scripts.functional_tests_results import FTResultsProcessor +from praktika.environment import Environment +from praktika.result import Result +from praktika.settings import Settings +from praktika.utils import MetaClasses, Shell, Utils + + +class ClickHouseProc: + def __init__(self): + self.ch_config_dir = f"{Settings.TEMP_DIR}/etc/clickhouse-server" + self.pid_file = f"{self.ch_config_dir}/clickhouse-server.pid" + self.config_file = f"{self.ch_config_dir}/config.xml" + self.user_files_path = f"{self.ch_config_dir}/user_files" + self.test_output_file = f"{Settings.OUTPUT_DIR}/test_result.txt" + self.command = f"clickhouse-server --config-file {self.config_file} --pid-file {self.pid_file} -- --path {self.ch_config_dir} --user_files_path {self.user_files_path} --top_level_domains_path {self.ch_config_dir}/top_level_domains --keeper_server.storage_path {self.ch_config_dir}/coordination" + self.proc = None + self.pid = 0 + nproc = int(Utils.cpu_count() / 2) + self.fast_test_command = f"clickhouse-test --hung-check --fast-tests-only --no-random-settings --no-random-merge-tree-settings --no-long --testname --shard --zookeeper --check-zookeeper-session --order random --print-time --report-logs-stats --jobs {nproc} -- '' | ts '%Y-%m-%d %H:%M:%S' \ + | tee -a \"{self.test_output_file}\"" + # TODO: store info in case of failure + self.info = "" + self.info_file = "" + + Utils.set_env("CLICKHOUSE_CONFIG_DIR", self.ch_config_dir) + Utils.set_env("CLICKHOUSE_CONFIG", self.config_file) + Utils.set_env("CLICKHOUSE_USER_FILES", self.user_files_path) + Utils.set_env("CLICKHOUSE_SCHEMA_FILES", f"{self.ch_config_dir}/format_schemas") + + def start(self): + print("Starting ClickHouse server") + Shell.check(f"rm {self.pid_file}") + + def run_clickhouse(): + self.proc = Shell.run_async( + self.command, verbose=True, suppress_output=True + ) + + thread = threading.Thread(target=run_clickhouse) + thread.daemon = True # Allow program to exit even if thread is still running + thread.start() + + # self.proc = Shell.run_async(self.command, verbose=True) + + started = False + try: + for _ in range(5): + pid = Shell.get_output(f"cat {self.pid_file}").strip() + if not pid: + Utils.sleep(1) + continue + started = True + print(f"Got pid from fs [{pid}]") + _ = int(pid) + break + except Exception: + pass + + if not started: + stdout = self.proc.stdout.read().strip() if self.proc.stdout else "" + stderr = self.proc.stderr.read().strip() if self.proc.stderr else "" + Utils.print_formatted_error("Failed to start ClickHouse", stdout, stderr) + return False + + print(f"ClickHouse server started successfully, pid [{pid}]") + return True + + def wait_ready(self): + res, out, err = 0, "", "" + attempts = 30 + delay = 2 + for attempt in range(attempts): + res, out, err = Shell.get_res_stdout_stderr( + 'clickhouse-client --query "select 1"', verbose=True + ) + if out.strip() == "1": + print("Server ready") + break + else: + print(f"Server not ready, wait") + Utils.sleep(delay) + else: + Utils.print_formatted_error( + f"Server not ready after [{attempts*delay}s]", out, err + ) + return False + return True + + def run_fast_test(self): + if Path(self.test_output_file).exists(): + Path(self.test_output_file).unlink() + exit_code = Shell.run(self.fast_test_command) + return exit_code == 0 + + def terminate(self): + print("Terminate ClickHouse process") + timeout = 10 + if self.proc: + Utils.terminate_process_group(self.proc.pid) + + self.proc.terminate() + try: + self.proc.wait(timeout=10) + print(f"Process {self.proc.pid} terminated gracefully.") + except Exception: + print( + f"Process {self.proc.pid} did not terminate in {timeout} seconds, killing it..." + ) + Utils.terminate_process_group(self.proc.pid, force=True) + self.proc.wait() # Wait for the process to be fully killed + print(f"Process {self.proc} was killed.") + + +def clone_submodules(): + submodules_to_update = [ + "contrib/sysroot", + "contrib/magic_enum", + "contrib/abseil-cpp", + "contrib/boost", + "contrib/zlib-ng", + "contrib/libxml2", + "contrib/libunwind", + "contrib/fmtlib", + "contrib/aklomp-base64", + "contrib/cctz", + "contrib/libcpuid", + "contrib/libdivide", + "contrib/double-conversion", + "contrib/llvm-project", + "contrib/lz4", + "contrib/zstd", + "contrib/fastops", + "contrib/rapidjson", + "contrib/re2", + "contrib/sparsehash-c11", + "contrib/croaring", + "contrib/miniselect", + "contrib/xz", + "contrib/dragonbox", + "contrib/fast_float", + "contrib/NuRaft", + "contrib/jemalloc", + "contrib/replxx", + "contrib/wyhash", + "contrib/c-ares", + "contrib/morton-nd", + "contrib/xxHash", + "contrib/expected", + "contrib/simdjson", + "contrib/liburing", + "contrib/libfiu", + "contrib/incbin", + "contrib/yaml-cpp", + ] + + res = Shell.check("git submodule sync", verbose=True, strict=True) + res = res and Shell.check("git submodule init", verbose=True, strict=True) + res = res and Shell.check( + command=f"xargs --max-procs={min([Utils.cpu_count(), 20])} --null --no-run-if-empty --max-args=1 git submodule update --depth 1 --single-branch", + stdin_str="\0".join(submodules_to_update) + "\0", + timeout=120, + retries=3, + verbose=True, + ) + res = res and Shell.check("git submodule foreach git reset --hard", verbose=True) + res = res and Shell.check("git submodule foreach git checkout @ -f", verbose=True) + res = res and Shell.check("git submodule foreach git clean -xfd", verbose=True) + return res + + +def update_path_ch_config(config_file_path=""): + print("Updating path in clickhouse config") + config_file_path = ( + config_file_path or f"{Settings.TEMP_DIR}/etc/clickhouse-server/config.xml" + ) + ssl_config_file_path = ( + f"{Settings.TEMP_DIR}/etc/clickhouse-server/config.d/ssl_certs.xml" + ) + try: + with open(config_file_path, "r", encoding="utf-8") as file: + content = file.read() + + with open(ssl_config_file_path, "r", encoding="utf-8") as file: + ssl_config_content = file.read() + content = content.replace(">/var/", f">{Settings.TEMP_DIR}/var/") + content = content.replace(">/etc/", f">{Settings.TEMP_DIR}/etc/") + ssl_config_content = ssl_config_content.replace( + ">/etc/", f">{Settings.TEMP_DIR}/etc/" + ) + with open(config_file_path, "w", encoding="utf-8") as file: + file.write(content) + with open(ssl_config_file_path, "w", encoding="utf-8") as file: + file.write(ssl_config_content) + except Exception as e: + print(f"ERROR: failed to update config, exception: {e}") + return False + return True + + +class JobStages(metaclass=MetaClasses.WithIter): + CHECKOUT_SUBMODULES = "checkout" + CMAKE = "cmake" + BUILD = "build" + CONFIG = "config" + TEST = "test" + + +def main(): + stop_watch = Utils.Stopwatch() + + stages = list(JobStages) + stage = Environment.LOCAL_RUN_PARAM or JobStages.CHECKOUT_SUBMODULES + if stage: + assert stage in JobStages, f"--param must be one of [{list(JobStages)}]" + print(f"Job will start from stage [{stage}]") + while stage in stages: + stages.pop(0) + stages.insert(0, stage) + + current_directory = Utils.cwd() + build_dir = f"{Settings.TEMP_DIR}/build" + + Utils.add_to_PATH(f"{build_dir}/programs:{current_directory}/tests") + + res = True + results = [] + + if res and JobStages.CHECKOUT_SUBMODULES in stages: + Shell.check(f"rm -rf {build_dir} && mkdir -p {build_dir}") + results.append( + Result.create_from_command_execution( + name="Checkout Submodules for Minimal Build", + command=clone_submodules, + ) + ) + res = results[-1].is_ok() + + if res and JobStages.CMAKE in stages: + results.append( + Result.create_from_command_execution( + name="Cmake configuration", + command=f"cmake {current_directory} -DCMAKE_CXX_COMPILER=clang++-18 -DCMAKE_C_COMPILER=clang-18 \ + -DCMAKE_TOOLCHAIN_FILE={current_directory}/cmake/linux/toolchain-x86_64-musl.cmake -DENABLE_LIBRARIES=0 \ + -DENABLE_TESTS=0 -DENABLE_UTILS=0 -DENABLE_THINLTO=0 -DENABLE_NURAFT=1 -DENABLE_SIMDJSON=1 \ + -DENABLE_JEMALLOC=1 -DENABLE_LIBURING=1 -DENABLE_YAML_CPP=1 -DCOMPILER_CACHE=sccache", + workdir=build_dir, + with_log=True, + ) + ) + res = results[-1].is_ok() + + if res and JobStages.BUILD in stages: + Shell.check("sccache --show-stats") + results.append( + Result.create_from_command_execution( + name="Build ClickHouse", + command="ninja clickhouse-bundle clickhouse-stripped", + workdir=build_dir, + with_log=True, + ) + ) + Shell.check("sccache --show-stats") + res = results[-1].is_ok() + + if res and JobStages.BUILD in stages: + commands = [ + f"mkdir -p {Settings.OUTPUT_DIR}/binaries", + f"cp ./programs/clickhouse {Settings.OUTPUT_DIR}/binaries/clickhouse", + f"zstd --threads=0 --force programs/clickhouse-stripped -o {Settings.OUTPUT_DIR}/binaries/clickhouse-stripped.zst", + "sccache --show-stats", + "clickhouse-client --version", + "clickhouse-test --help", + ] + results.append( + Result.create_from_command_execution( + name="Check and Compress binary", + command=commands, + workdir=build_dir, + with_log=True, + ) + ) + res = results[-1].is_ok() + + if res and JobStages.CONFIG in stages: + commands = [ + f"rm -rf {Settings.TEMP_DIR}/etc/ && mkdir -p {Settings.TEMP_DIR}/etc/clickhouse-client {Settings.TEMP_DIR}/etc/clickhouse-server", + f"cp {current_directory}/programs/server/config.xml {current_directory}/programs/server/users.xml {Settings.TEMP_DIR}/etc/clickhouse-server/", + f"{current_directory}/tests/config/install.sh {Settings.TEMP_DIR}/etc/clickhouse-server {Settings.TEMP_DIR}/etc/clickhouse-client", + # f"cp -a {current_directory}/programs/server/config.d/log_to_console.xml {Settings.TEMP_DIR}/etc/clickhouse-server/config.d/", + f"rm -f {Settings.TEMP_DIR}/etc/clickhouse-server/config.d/secure_ports.xml", + update_path_ch_config, + ] + results.append( + Result.create_from_command_execution( + name="Install ClickHouse Config", + command=commands, + with_log=True, + ) + ) + res = results[-1].is_ok() + + CH = ClickHouseProc() + if res and JobStages.TEST in stages: + stop_watch_ = Utils.Stopwatch() + step_name = "Start ClickHouse Server" + print(step_name) + res = CH.start() + res = res and CH.wait_ready() + results.append( + Result.create_from(name=step_name, status=res, stopwatch=stop_watch_) + ) + + if res and JobStages.TEST in stages: + step_name = "Tests" + print(step_name) + res = res and CH.run_fast_test() + if res: + results.append(FTResultsProcessor(wd=Settings.OUTPUT_DIR).run()) + + CH.terminate() + + Result.create_from(results=results, stopwatch=stop_watch).finish_job_accordingly() + + +if __name__ == "__main__": + main() diff --git a/ci_v2/jobs/scripts/check_style/aspell-ignore/en/aspell-dict.txt b/ci/jobs/scripts/check_style/aspell-ignore/en/aspell-dict.txt similarity index 100% rename from ci_v2/jobs/scripts/check_style/aspell-ignore/en/aspell-dict.txt rename to ci/jobs/scripts/check_style/aspell-ignore/en/aspell-dict.txt diff --git a/ci_v2/jobs/scripts/check_style/check_aspell.sh b/ci/jobs/scripts/check_style/check_aspell.sh similarity index 100% rename from ci_v2/jobs/scripts/check_style/check_aspell.sh rename to ci/jobs/scripts/check_style/check_aspell.sh diff --git a/ci_v2/jobs/scripts/check_style/check_cpp.sh b/ci/jobs/scripts/check_style/check_cpp.sh similarity index 86% rename from ci_v2/jobs/scripts/check_style/check_cpp.sh rename to ci/jobs/scripts/check_style/check_cpp.sh index 1611fac8c5e..7963bf982af 100755 --- a/ci_v2/jobs/scripts/check_style/check_cpp.sh +++ b/ci/jobs/scripts/check_style/check_cpp.sh @@ -14,7 +14,8 @@ LC_ALL="en_US.UTF-8" ROOT_PATH="." -EXCLUDE_DIRS='build/|integration/|widechar_width/|glibc-compatibility/|poco/|memcpy/|consistent-hashing|benchmark|tests/.*.cpp|utils/keeper-bench/example.yaml' +EXCLUDE='build/|integration/|widechar_width/|glibc-compatibility/|poco/|memcpy/|consistent-hashing|benchmark|tests/.*.cpp|utils/keeper-bench/example.yaml' +EXCLUDE_DOCS='Settings\.cpp|FormatFactorySettingsDeclaration\.h' # From [1]: # But since array_to_string_internal() in array.c still loops over array @@ -31,7 +32,8 @@ function in_array() } find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' 2>/dev/null | - grep -vP $EXCLUDE_DIRS | + grep -vP $EXCLUDE | + grep -vP $EXCLUDE_DOCS | xargs grep $@ -P '((class|struct|namespace|enum|if|for|while|else|throw|switch).*|\)(\s*const)?(\s*override)?\s*)\{$|\s$|^ {1,3}[^\* ]\S|\t|^\s*(if|else if|if constexpr|else if constexpr|for|while|catch|switch)\(|\( [^\s\\]|\S \)' | # a curly brace not in a new line, but not for the case of C++11 init or agg. initialization | trailing whitespace | number of ws not a multiple of 4, but not in the case of comment continuation | missing whitespace after for/if/while... before opening brace | whitespaces inside braces grep -v -P '(//|:\s+\*|\$\(\()| \)"' @@ -39,12 +41,12 @@ find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' 2>/dev/n # Tabs find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' 2>/dev/null | - grep -vP $EXCLUDE_DIRS | - xargs grep $@ -F $'\t' + grep -vP $EXCLUDE | + xargs grep $@ -F $'\t' && echo '^ tabs are not allowed' # // namespace comments are unneeded find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' 2>/dev/null | - grep -vP $EXCLUDE_DIRS | + grep -vP $EXCLUDE | xargs grep $@ -P '}\s*//+\s*namespace\s*' # Broken symlinks @@ -52,26 +54,26 @@ find -L $ROOT_PATH -type l 2>/dev/null | grep -v contrib && echo "^ Broken symli # Duplicated or incorrect setting declarations SETTINGS_FILE=$(mktemp) -cat $ROOT_PATH/src/Core/Settings.cpp $ROOT_PATH/src/Core/FormatFactorySettingsDeclaration.h | grep "M(" | awk '{print substr($2, 0, length($2) - 1) " " substr($1, 3, length($1) - 3) " SettingsDeclaration" }' > ${SETTINGS_FILE} -find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' | xargs grep "extern const Settings" -T | awk '{print substr($5, 0, length($5) -1) " " substr($4, 9) " " substr($1, 0, length($1) - 1)}' >> ${SETTINGS_FILE} +ALL_DECLARATION_FILES=" + $ROOT_PATH/src/Core/Settings.cpp + $ROOT_PATH/src/Storages/MergeTree/MergeTreeSettings.cpp + $ROOT_PATH/src/Core/FormatFactorySettingsDeclaration.h" -# Duplicate extern declarations for settings -awk '{if (seen[$0]++) print $3 " -> " $1 ;}' ${SETTINGS_FILE} | while read line; +cat $ROOT_PATH/src/Core/Settings.cpp $ROOT_PATH/src/Core/FormatFactorySettingsDeclaration.h | grep "M(" | awk '{print substr($2, 0, length($2) - 1) " Settings" substr($1, 3, length($1) - 3) " SettingsDeclaration" }' | sort | uniq > ${SETTINGS_FILE} +cat $ROOT_PATH/src/Storages/MergeTree/MergeTreeSettings.cpp | grep "M(" | awk '{print substr($2, 0, length($2) - 1) " MergeTreeSettings" substr($1, 3, length($1) - 3) " SettingsDeclaration" }' | sort | uniq >> ${SETTINGS_FILE} + +# Check that if there are duplicated settings (declared in different objects) they all have the same type (it's simpler to validate style with that assert) +for setting in $(awk '{print $1 " " $2}' ${SETTINGS_FILE} | sed -e 's/MergeTreeSettings//g' -e 's/Settings//g' | sort | uniq | awk '{ print $1 }' | uniq -d); do - echo "Found duplicated setting declaration in: $line" + echo "# Found multiple definitions of setting ${setting} with different types: " + grep --line-number " ${setting}," ${ALL_DECLARATION_FILES} | awk '{print " > " $0 }' done -# Incorrect declarations for settings -for setting in $(awk '{print $1 " " $2}' ${SETTINGS_FILE} | sort | uniq | awk '{ print $1 }' | sort | uniq -d); -do - expected=$(grep "^$setting " ${SETTINGS_FILE} | grep SettingsDeclaration | awk '{ print $2 }') - grep "^$setting " ${SETTINGS_FILE} | grep -v " $expected" | awk '{ print $3 " found setting " $1 " with type " $2 }' | while read line; - do - echo "In $line but it should be $expected" - done -done +# We append all uses of extern found in implementation files to validate them in a single pass and avoid reading the same files over and over +find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' | xargs grep -e "^\s*extern const Settings" -e "^\s**extern const MergeTreeSettings" -T | awk '{print substr($5, 0, length($5) -1) " " $4 " " substr($1, 0, length($1) - 1)}' >> ${SETTINGS_FILE} -rm ${SETTINGS_FILE} +# Duplicated or incorrect setting declarations +bash $ROOT_PATH/utils/check-style/check-settings-style # Unused/Undefined/Duplicates ErrorCodes/ProfileEvents/CurrentMetrics declare -A EXTERN_TYPES @@ -91,12 +93,14 @@ EXTERN_TYPES_EXCLUDES=( ProfileEvents::Timer ProfileEvents::Type ProfileEvents::TypeEnum + ProfileEvents::ValueType ProfileEvents::dumpToMapColumn ProfileEvents::getProfileEvents ProfileEvents::ThreadIdToCountersSnapshot ProfileEvents::LOCAL_NAME ProfileEvents::keeper_profile_events ProfileEvents::CountersIncrement + ProfileEvents::size CurrentMetrics::add CurrentMetrics::sub @@ -108,6 +112,7 @@ EXTERN_TYPES_EXCLUDES=( CurrentMetrics::values CurrentMetrics::Value CurrentMetrics::keeper_metrics + CurrentMetrics::size ErrorCodes::ErrorCode ErrorCodes::getName @@ -130,7 +135,7 @@ for extern_type in ${!EXTERN_TYPES[@]}; do # and this matches with zkutil::CreateMode grep -v -e 'src/Common/ZooKeeper/Types.h' -e 'src/Coordination/KeeperConstants.cpp' } | { - grep -vP $EXCLUDE_DIRS | xargs grep -l -P "extern const $type_of_extern $allowed_chars" + grep -vP $EXCLUDE | xargs grep -l -P "extern const $type_of_extern $allowed_chars" } | while read file; do grep -P "extern const $type_of_extern $allowed_chars;" $file | sed -r -e "s/^.*?extern const $type_of_extern ($allowed_chars);.*?$/\1/" | while read val; do if ! grep -q "$extern_type::$val" $file; then @@ -148,7 +153,7 @@ for extern_type in ${!EXTERN_TYPES[@]}; do # sed -i -r "0,/(\s*)extern const $type_of_extern [$allowed_chars]+/s//\1extern const $type_of_extern $val;\n&/" $file || \ # awk '{ print; if (ns == 1) { ns = 2 }; if (ns == 2) { ns = 0; print "namespace $extern_type\n{\n extern const $type_of_extern '$val';\n}" } }; /namespace DB/ { ns = 1; };' < $file > ${file}.tmp && mv ${file}.tmp $file ) find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' | { - grep -vP $EXCLUDE_DIRS | xargs grep -l -P "$extern_type::$allowed_chars" + grep -vP $EXCLUDE | xargs grep -l -P "$extern_type::$allowed_chars" } | while read file; do grep -P "$extern_type::$allowed_chars" $file | grep -P -v '^\s*//' | sed -r -e "s/^.*?$extern_type::($allowed_chars).*?$/\1/" | while read val; do if ! grep -q "extern const $type_of_extern $val" $file; then @@ -161,7 +166,7 @@ for extern_type in ${!EXTERN_TYPES[@]}; do # Duplicates find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' | { - grep -vP $EXCLUDE_DIRS | xargs grep -l -P "$extern_type::$allowed_chars" + grep -vP $EXCLUDE | xargs grep -l -P "$extern_type::$allowed_chars" } | while read file; do grep -P "extern const $type_of_extern $allowed_chars;" $file | sort | uniq -c | grep -v -P ' +1 ' && echo "Duplicate $extern_type in file $file" done @@ -169,32 +174,32 @@ done # Three or more consecutive empty lines find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' | - grep -vP $EXCLUDE_DIRS | + grep -vP $EXCLUDE | while read file; do awk '/^$/ { ++i; if (i > 2) { print "More than two consecutive empty lines in file '$file'" } } /./ { i = 0 }' $file; done # Check that every header file has #pragma once in first line find $ROOT_PATH/{src,programs,utils} -name '*.h' | - grep -vP $EXCLUDE_DIRS | + grep -vP $EXCLUDE | while read file; do [[ $(head -n1 $file) != '#pragma once' ]] && echo "File $file must have '#pragma once' in first line"; done # Too many exclamation marks find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' | - grep -vP $EXCLUDE_DIRS | + grep -vP $EXCLUDE | xargs grep -F '!!!' | grep -P '.' && echo "Too many exclamation marks (looks dirty, unconfident)." # Exclamation mark in a message find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' | - grep -vP $EXCLUDE_DIRS | + grep -vP $EXCLUDE | xargs grep -F '!",' | grep -P '.' && echo "No need for an exclamation mark (looks dirty, unconfident)." # Trailing whitespaces find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' | - grep -vP $EXCLUDE_DIRS | + grep -vP $EXCLUDE | xargs grep -n -P ' $' | grep -n -P '.' && echo "^ Trailing whitespaces." # Forbid stringstream because it's easy to use them incorrectly and hard to debug possible issues find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' | - grep -vP $EXCLUDE_DIRS | + grep -vP $EXCLUDE | xargs grep -P 'std::[io]?stringstream' | grep -v "STYLE_CHECK_ALLOW_STD_STRING_STREAM" && echo "Use WriteBufferFromOwnString or ReadBufferFromString instead of std::stringstream" # Forbid std::cerr/std::cout in src (fine in programs/utils) @@ -204,6 +209,7 @@ std_cerr_cout_excludes=( _fuzzer # OK src/Common/ProgressIndication.cpp + src/Common/ProgressTable.cpp # only under #ifdef DBMS_HASH_MAP_DEBUG_RESIZES, that is used only in tests src/Common/HashTable/HashTable.h # SensitiveDataMasker::printStats() @@ -230,11 +236,10 @@ std_cerr_cout_excludes=( ) sources_with_std_cerr_cout=( $( find $ROOT_PATH/{src,base} -name '*.h' -or -name '*.cpp' | \ - grep -vP $EXCLUDE_DIRS | \ + grep -vP $EXCLUDE | \ grep -F -v $(printf -- "-e %s " "${std_cerr_cout_excludes[@]}") | \ xargs grep -F --with-filename -e std::cerr -e std::cout | cut -d: -f1 | sort -u ) ) - # Exclude comments for src in "${sources_with_std_cerr_cout[@]}"; do # suppress stderr, since it may contain warning for #pargma once in headers @@ -279,23 +284,23 @@ fi # Forbid std::filesystem::is_symlink and std::filesystem::read_symlink, because it's easy to use them incorrectly find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' | - grep -vP $EXCLUDE_DIRS | + grep -vP $EXCLUDE | xargs grep -P '::(is|read)_symlink' | grep -v "STYLE_CHECK_ALLOW_STD_FS_SYMLINK" && echo "Use DB::FS::isSymlink and DB::FS::readSymlink instead" # Forbid __builtin_unreachable(), because it's hard to debug when it becomes reachable find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' | - grep -vP $EXCLUDE_DIRS | + grep -vP $EXCLUDE | xargs grep -P '__builtin_unreachable' && echo "Use UNREACHABLE() from defines.h instead" # Forbid mt19937() and random_device() which are outdated and slow find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' | - grep -vP $EXCLUDE_DIRS | + grep -vP $EXCLUDE | xargs grep -P '(std::mt19937|std::mersenne_twister_engine|std::random_device)' && echo "Use pcg64_fast (from pcg_random.h) and randomSeed (from Common/randomSeed.h) instead" # Require checking return value of close(), # since it can hide fd misuse and break other places. find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' | - grep -vP $EXCLUDE_DIRS | + grep -vP $EXCLUDE | xargs grep -e ' close(.*fd' -e ' ::close(' | grep -v = && echo "Return value of close() should be checked" # A small typo can lead to debug code in release builds, see https://github.com/ClickHouse/ClickHouse/pull/47647 @@ -322,18 +327,15 @@ ls -1d $ROOT_PATH/contrib/*-cmake | xargs -I@ find @ -name 'CMakeLists.txt' -or # Wrong spelling of abbreviations, e.g. SQL is right, Sql is wrong. XMLHttpRequest is very wrong. find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' | - grep -vP $EXCLUDE_DIRS | + grep -vP $EXCLUDE | xargs grep -P 'Sql|Html|Xml|Cpu|Tcp|Udp|Http|Db|Json|Yaml' | grep -v -P 'RabbitMQ|Azure|Aws|aws|Avro|IO/S3' && echo "Abbreviations such as SQL, XML, HTTP, should be in all caps. For example, SQL is right, Sql is wrong. XMLHttpRequest is very wrong." find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' | - grep -vP $EXCLUDE_DIRS | + grep -vP $EXCLUDE | xargs grep -F -i 'ErrorCodes::LOGICAL_ERROR, "Logical error:' && echo "If an exception has LOGICAL_ERROR code, there is no need to include the text 'Logical error' in the exception message, because then the phrase 'Logical error' will be printed twice." -# There shouldn't be any code snippets under GPL or LGPL -find $ROOT_PATH/{src,base,programs} -name '*.h' -or -name '*.cpp' 2>/dev/null | xargs grep -i -F 'General Public License' && echo "There shouldn't be any code snippets under GPL or LGPL" - PATTERN="allow_"; DIFF=$(comm -3 <(grep -o "\b$PATTERN\w*\b" $ROOT_PATH/src/Core/Settings.cpp | sort -u) <(grep -o -h "\b$PATTERN\w*\b" $ROOT_PATH/src/Databases/enableAllExperimentalSettings.cpp $ROOT_PATH/utils/check-style/experimental_settings_ignore.txt | sort -u)); [ -n "$DIFF" ] && echo "$DIFF" && echo "^^ Detected 'allow_*' settings that might need to be included in src/Databases/enableAllExperimentalSettings.cpp" && echo "Alternatively, consider adding an exception to utils/check-style/experimental_settings_ignore.txt" diff --git a/ci_v2/jobs/scripts/check_style/check_submodules.sh b/ci/jobs/scripts/check_style/check_submodules.sh similarity index 100% rename from ci_v2/jobs/scripts/check_style/check_submodules.sh rename to ci/jobs/scripts/check_style/check_submodules.sh diff --git a/ci_v2/jobs/scripts/check_style/check_typos.sh b/ci/jobs/scripts/check_style/check_typos.sh similarity index 100% rename from ci_v2/jobs/scripts/check_style/check_typos.sh rename to ci/jobs/scripts/check_style/check_typos.sh diff --git a/ci_v2/jobs/scripts/check_style/checks_to_refactor.sh b/ci/jobs/scripts/check_style/checks_to_refactor.sh similarity index 100% rename from ci_v2/jobs/scripts/check_style/checks_to_refactor.sh rename to ci/jobs/scripts/check_style/checks_to_refactor.sh diff --git a/ci_v2/jobs/scripts/check_style/double_whitespaces.pl b/ci/jobs/scripts/check_style/double_whitespaces.pl similarity index 100% rename from ci_v2/jobs/scripts/check_style/double_whitespaces.pl rename to ci/jobs/scripts/check_style/double_whitespaces.pl diff --git a/ci/jobs/scripts/functional_tests_results.py b/ci/jobs/scripts/functional_tests_results.py new file mode 100755 index 00000000000..5ac9d6b985d --- /dev/null +++ b/ci/jobs/scripts/functional_tests_results.py @@ -0,0 +1,284 @@ +import dataclasses +from typing import List + +from praktika.environment import Environment +from praktika.result import Result + +OK_SIGN = "[ OK " +FAIL_SIGN = "[ FAIL " +TIMEOUT_SIGN = "[ Timeout! " +UNKNOWN_SIGN = "[ UNKNOWN " +SKIPPED_SIGN = "[ SKIPPED " +HUNG_SIGN = "Found hung queries in processlist" +SERVER_DIED_SIGN = "Server died, terminating all processes" +SERVER_DIED_SIGN2 = "Server does not respond to health check" +DATABASE_SIGN = "Database: " + +SUCCESS_FINISH_SIGNS = ["All tests have finished", "No tests were run"] + +RETRIES_SIGN = "Some tests were restarted" + + +# def write_results(results_file, status_file, results, status): +# with open(results_file, "w", encoding="utf-8") as f: +# out = csv.writer(f, delimiter="\t") +# out.writerows(results) +# with open(status_file, "w", encoding="utf-8") as f: +# out = csv.writer(f, delimiter="\t") +# out.writerow(status) + +BROKEN_TESTS_ANALYZER_TECH_DEBT = [ + "01624_soft_constraints", + # Check after ConstantNode refactoring + "02944_variant_as_common_type", +] + + +class FTResultsProcessor: + @dataclasses.dataclass + class Summary: + total: int + skipped: int + unknown: int + failed: int + success: int + test_results: List[Result] + hung: bool = False + server_died: bool = False + retries: bool = False + success_finish: bool = False + test_end: bool = True + + def __init__(self, wd): + self.tests_output_file = f"{wd}/test_result.txt" + # self.test_results_parsed_file = f"{wd}/test_result.tsv" + # self.status_file = f"{wd}/check_status.tsv" + self.broken_tests = BROKEN_TESTS_ANALYZER_TECH_DEBT + + def _process_test_output(self): + total = 0 + skipped = 0 + unknown = 0 + failed = 0 + success = 0 + hung = False + server_died = False + retries = False + success_finish = False + test_results = [] + test_end = True + + with open(self.tests_output_file, "r", encoding="utf-8") as test_file: + for line in test_file: + original_line = line + line = line.strip() + + if any(s in line for s in SUCCESS_FINISH_SIGNS): + success_finish = True + # Ignore hung check report, since it may be quite large. + # (and may break python parser which has limit of 128KiB for each row). + if HUNG_SIGN in line: + hung = True + break + if SERVER_DIED_SIGN in line or SERVER_DIED_SIGN2 in line: + server_died = True + if RETRIES_SIGN in line: + retries = True + if any( + sign in line + for sign in (OK_SIGN, FAIL_SIGN, UNKNOWN_SIGN, SKIPPED_SIGN) + ): + test_name = line.split(" ")[2].split(":")[0] + + test_time = "" + try: + time_token = line.split("]")[1].strip().split()[0] + float(time_token) + test_time = time_token + except: + pass + + total += 1 + if TIMEOUT_SIGN in line: + if test_name in self.broken_tests: + success += 1 + test_results.append((test_name, "BROKEN", test_time, [])) + else: + failed += 1 + test_results.append((test_name, "Timeout", test_time, [])) + elif FAIL_SIGN in line: + if test_name in self.broken_tests: + success += 1 + test_results.append((test_name, "BROKEN", test_time, [])) + else: + failed += 1 + test_results.append((test_name, "FAIL", test_time, [])) + elif UNKNOWN_SIGN in line: + unknown += 1 + test_results.append((test_name, "FAIL", test_time, [])) + elif SKIPPED_SIGN in line: + skipped += 1 + test_results.append((test_name, "SKIPPED", test_time, [])) + else: + if OK_SIGN in line and test_name in self.broken_tests: + skipped += 1 + test_results.append( + ( + test_name, + "NOT_FAILED", + test_time, + [ + "This test passed. Update analyzer_tech_debt.txt.\n" + ], + ) + ) + else: + success += int(OK_SIGN in line) + test_results.append((test_name, "OK", test_time, [])) + test_end = False + elif ( + len(test_results) > 0 + and test_results[-1][1] == "FAIL" + and not test_end + ): + test_results[-1][3].append(original_line) + # Database printed after everything else in case of failures, + # so this is a stop marker for capturing test output. + # + # And it is handled after everything else to include line with database into the report. + if DATABASE_SIGN in line: + test_end = True + + test_results = [ + Result( + name=test[0], + status=test[1], + start_time=None, + duration=float(test[2]), + info="".join(test[3])[:8192], + ) + for test in test_results + ] + + s = self.Summary( + total=total, + skipped=skipped, + unknown=unknown, + failed=failed, + success=success, + test_results=test_results, + hung=hung, + server_died=server_died, + success_finish=success_finish, + retries=retries, + ) + + return s + + def run(self): + state = Result.Status.SUCCESS + s = self._process_test_output() + test_results = s.test_results + + # # Check test_results.tsv for sanitizer asserts, crashes and other critical errors. + # # If the file is present, it's expected to be generated by stress_test.lib check for critical errors + # # In the end this file will be fully regenerated, including both results from critical errors check and + # # functional test results. + # if test_results_path and os.path.exists(test_results_path): + # with open(test_results_path, "r", encoding="utf-8") as test_results_file: + # existing_test_results = list( + # csv.reader(test_results_file, delimiter="\t") + # ) + # for test in existing_test_results: + # if len(test) < 2: + # unknown += 1 + # else: + # test_results.append(test) + # + # if test[1] != "OK": + # failed += 1 + # else: + # success += 1 + + # is_flaky_check = 1 < int(os.environ.get("NUM_TRIES", 1)) + # logging.info("Is flaky check: %s", is_flaky_check) + # # If no tests were run (success == 0) it indicates an error (e.g. server did not start or crashed immediately) + # # But it's Ok for "flaky checks" - they can contain just one test for check which is marked as skipped. + # if failed != 0 or unknown != 0 or (success == 0 and (not is_flaky_check)): + if s.failed != 0 or s.unknown != 0: + state = Result.Status.FAILED + + if s.hung: + state = Result.Status.FAILED + test_results.append( + Result("Some queries hung", "FAIL", info="Some queries hung") + ) + elif s.server_died: + state = Result.Status.FAILED + # When ClickHouse server crashes, some tests are still running + # and fail because they cannot connect to server + for result in test_results: + if result.status == "FAIL": + result.status = "SERVER_DIED" + test_results.append(Result("Server died", "FAIL", info="Server died")) + elif not s.success_finish: + state = Result.Status.FAILED + test_results.append( + Result("Tests are not finished", "FAIL", info="Tests are not finished") + ) + elif s.retries: + test_results.append( + Result("Some tests restarted", "SKIPPED", info="Some tests restarted") + ) + else: + pass + + # TODO: !!! + # def test_result_comparator(item): + # # sort by status then by check name + # order = { + # "FAIL": 0, + # "SERVER_DIED": 1, + # "Timeout": 2, + # "NOT_FAILED": 3, + # "BROKEN": 4, + # "OK": 5, + # "SKIPPED": 6, + # } + # return order.get(item[1], 10), str(item[0]), item[1] + # + # test_results.sort(key=test_result_comparator) + + return Result.create_from( + name=Environment.JOB_NAME, + results=test_results, + status=state, + files=[self.tests_output_file], + with_info_from_results=False, + ) + + +# if __name__ == "__main__": +# logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s") +# parser = argparse.ArgumentParser( +# description="ClickHouse script for parsing results of functional tests" +# ) +# +# parser.add_argument("--out-results-file", default="/test_output/test_results.tsv") +# parser.add_argument("--out-status-file", default="/test_output/check_status.tsv") +# args = parser.parse_args() +# +# broken_tests = [] +# state, description, test_results = process_result( +# args.in_results_dir, +# broken_tests, +# args.in_test_result_file, +# args.in_results_file, +# ) +# logging.info("Result parsed") +# status = (state, description) +# +# +# +# write_results(args.out_results_file, args.out_status_file, test_results, status) +# logging.info("Result written") diff --git a/ci/praktika/__init__.py b/ci/praktika/__init__.py new file mode 100644 index 00000000000..bde8fd6066a --- /dev/null +++ b/ci/praktika/__init__.py @@ -0,0 +1,5 @@ +from .artifact import Artifact +from .docker import Docker +from .job import Job +from .secret import Secret +from .workflow import Workflow diff --git a/ci/praktika/__main__.py b/ci/praktika/__main__.py new file mode 100644 index 00000000000..7f472ecd9ae --- /dev/null +++ b/ci/praktika/__main__.py @@ -0,0 +1,94 @@ +import argparse +import sys + +from praktika.html_prepare import Html +from praktika.utils import Utils +from praktika.validator import Validator +from praktika.yaml_generator import YamlGenerator + + +def create_parser(): + parser = argparse.ArgumentParser(prog="python3 -m praktika") + + subparsers = parser.add_subparsers(dest="command", help="Available subcommands") + + run_parser = subparsers.add_parser("run", help="Job Runner") + run_parser.add_argument("--job", help="Job Name", type=str, required=True) + run_parser.add_argument( + "--workflow", + help="Workflow Name (required if job name is not uniq per config)", + type=str, + default="", + ) + run_parser.add_argument( + "--no-docker", + help="Do not run job in docker even if job config says so, for local test", + action="store_true", + ) + run_parser.add_argument( + "--docker", + help="Custom docker image for job run, for local test", + type=str, + default="", + ) + run_parser.add_argument( + "--param", + help="Custom parameter to pass into a job script, it's up to job script how to use it, for local test", + type=str, + default=None, + ) + run_parser.add_argument( + "--ci", + help="When not set - dummy env will be generated, for local test", + action="store_true", + default="", + ) + + _yaml_parser = subparsers.add_parser("yaml", help="Generates Yaml Workflows") + + _html_parser = subparsers.add_parser("html", help="Uploads HTML page for reports") + + return parser + + +if __name__ == "__main__": + parser = create_parser() + args = parser.parse_args() + + if args.command == "yaml": + Validator().validate() + YamlGenerator().generate() + elif args.command == "html": + Html.prepare() + elif args.command == "run": + from praktika.mangle import _get_workflows + from praktika.runner import Runner + + workflows = _get_workflows(name=args.workflow or None) + job_workflow_pairs = [] + for workflow in workflows: + job = workflow.find_job(args.job, lazy=True) + if job: + job_workflow_pairs.append((job, workflow)) + if not job_workflow_pairs: + Utils.raise_with_error( + f"Failed to find job [{args.job}] workflow [{args.workflow}]" + ) + elif len(job_workflow_pairs) > 1: + Utils.raise_with_error( + f"More than one job [{args.job}] found - try specifying workflow name with --workflow" + ) + else: + job, workflow = job_workflow_pairs[0][0], job_workflow_pairs[0][1] + print(f"Going to run job [{job.name}], workflow [{workflow.name}]") + Runner().run( + workflow=workflow, + job=job, + docker=args.docker, + dummy_env=not args.ci, + no_docker=args.no_docker, + param=args.param, + ) + else: + parser.print_help() + sys.exit(1) diff --git a/ci/praktika/_environment.py b/ci/praktika/_environment.py new file mode 100644 index 00000000000..ca84def1d29 --- /dev/null +++ b/ci/praktika/_environment.py @@ -0,0 +1,195 @@ +import dataclasses +import json +import os +from pathlib import Path +from types import SimpleNamespace +from typing import Any, Dict, List, Type + +from praktika import Workflow +from praktika._settings import _Settings +from praktika.utils import MetaClasses, T + + +@dataclasses.dataclass +class _Environment(MetaClasses.Serializable): + WORKFLOW_NAME: str + JOB_NAME: str + REPOSITORY: str + BRANCH: str + SHA: str + PR_NUMBER: int + EVENT_TYPE: str + JOB_OUTPUT_STREAM: str + EVENT_FILE_PATH: str + CHANGE_URL: str + COMMIT_URL: str + BASE_BRANCH: str + RUN_ID: str + RUN_URL: str + INSTANCE_TYPE: str + INSTANCE_ID: str + INSTANCE_LIFE_CYCLE: str + PARAMETER: Any = None + REPORT_INFO: List[str] = dataclasses.field(default_factory=list) + LOCAL_RUN_PARAM: str = "" + name = "environment" + + @classmethod + def file_name_static(cls, _name=""): + return f"{_Settings.TEMP_DIR}/{cls.name}.json" + + @classmethod + def from_dict(cls: Type[T], obj: Dict[str, Any]) -> T: + JOB_OUTPUT_STREAM = os.getenv("GITHUB_OUTPUT", "") + obj["JOB_OUTPUT_STREAM"] = JOB_OUTPUT_STREAM + if "PARAMETER" in obj: + obj["PARAMETER"] = _to_object(obj["PARAMETER"]) + return cls(**obj) + + def add_info(self, info): + self.REPORT_INFO.append(info) + self.dump() + + @classmethod + def get(cls): + if Path(cls.file_name_static()).is_file(): + return cls.from_fs("environment") + else: + print("WARNING: Environment: get from env") + env = cls.from_env() + env.dump() + return env + + def set_job_name(self, job_name): + self.JOB_NAME = job_name + self.dump() + return self + + @staticmethod + def get_needs_statuses(): + if Path(_Settings.WORKFLOW_STATUS_FILE).is_file(): + with open(_Settings.WORKFLOW_STATUS_FILE, "r", encoding="utf8") as f: + return json.load(f) + else: + print( + f"ERROR: Status file [{_Settings.WORKFLOW_STATUS_FILE}] does not exist" + ) + raise RuntimeError() + + @classmethod + def from_env(cls) -> "_Environment": + WORKFLOW_NAME = os.getenv("GITHUB_WORKFLOW", "") + JOB_NAME = os.getenv("JOB_NAME", "") + REPOSITORY = os.getenv("GITHUB_REPOSITORY", "") + BRANCH = os.getenv("GITHUB_HEAD_REF", "") + + EVENT_FILE_PATH = os.getenv("GITHUB_EVENT_PATH", "") + JOB_OUTPUT_STREAM = os.getenv("GITHUB_OUTPUT", "") + RUN_ID = os.getenv("GITHUB_RUN_ID", "0") + RUN_URL = f"https://github.com/{REPOSITORY}/actions/runs/{RUN_ID}" + BASE_BRANCH = os.getenv("GITHUB_BASE_REF", "") + + if EVENT_FILE_PATH: + with open(EVENT_FILE_PATH, "r", encoding="utf-8") as f: + github_event = json.load(f) + if "pull_request" in github_event: + EVENT_TYPE = Workflow.Event.PULL_REQUEST + PR_NUMBER = github_event["pull_request"]["number"] + SHA = github_event["pull_request"]["head"]["sha"] + CHANGE_URL = github_event["pull_request"]["html_url"] + COMMIT_URL = CHANGE_URL + f"/commits/{SHA}" + elif "commits" in github_event: + EVENT_TYPE = Workflow.Event.PUSH + SHA = github_event["after"] + CHANGE_URL = github_event["head_commit"]["url"] # commit url + PR_NUMBER = 0 + COMMIT_URL = CHANGE_URL + else: + assert False, "TODO: not supported" + else: + print("WARNING: Local execution - dummy Environment will be generated") + SHA = "TEST" + PR_NUMBER = -1 + EVENT_TYPE = Workflow.Event.PUSH + CHANGE_URL = "" + COMMIT_URL = "" + + INSTANCE_TYPE = ( + os.getenv("INSTANCE_TYPE", None) + # or Shell.get_output("ec2metadata --instance-type") + or "" + ) + INSTANCE_ID = ( + os.getenv("INSTANCE_ID", None) + # or Shell.get_output("ec2metadata --instance-id") + or "" + ) + INSTANCE_LIFE_CYCLE = ( + os.getenv("INSTANCE_LIFE_CYCLE", None) + # or Shell.get_output( + # "curl -s --fail http://169.254.169.254/latest/meta-data/instance-life-cycle" + # ) + or "" + ) + + return _Environment( + WORKFLOW_NAME=WORKFLOW_NAME, + JOB_NAME=JOB_NAME, + REPOSITORY=REPOSITORY, + BRANCH=BRANCH, + EVENT_FILE_PATH=EVENT_FILE_PATH, + JOB_OUTPUT_STREAM=JOB_OUTPUT_STREAM, + SHA=SHA, + EVENT_TYPE=EVENT_TYPE, + PR_NUMBER=PR_NUMBER, + RUN_ID=RUN_ID, + CHANGE_URL=CHANGE_URL, + COMMIT_URL=COMMIT_URL, + RUN_URL=RUN_URL, + BASE_BRANCH=BASE_BRANCH, + INSTANCE_TYPE=INSTANCE_TYPE, + INSTANCE_ID=INSTANCE_ID, + INSTANCE_LIFE_CYCLE=INSTANCE_LIFE_CYCLE, + REPORT_INFO=[], + ) + + def get_s3_prefix(self, latest=False): + return self.get_s3_prefix_static(self.PR_NUMBER, self.BRANCH, self.SHA, latest) + + @classmethod + def get_s3_prefix_static(cls, pr_number, branch, sha, latest=False): + prefix = "" + if pr_number > 0: + prefix += f"{pr_number}" + else: + prefix += f"{branch}" + if latest: + prefix += f"/latest" + elif sha: + prefix += f"/{sha}" + return prefix + + # TODO: find a better place for the function. This file should not import praktika.settings + # as it's requires reading users config, that's why imports nested inside the function + def get_report_url(self): + import urllib + + from praktika.settings import Settings + from praktika.utils import Utils + + path = Settings.HTML_S3_PATH + for bucket, endpoint in Settings.S3_BUCKET_TO_HTTP_ENDPOINT.items(): + if bucket in path: + path = path.replace(bucket, endpoint) + break + REPORT_URL = f"https://{path}/{Path(Settings.HTML_PAGE_FILE).name}?PR={self.PR_NUMBER}&sha={self.SHA}&name_0={urllib.parse.quote(self.WORKFLOW_NAME, safe='')}&name_1={urllib.parse.quote(self.JOB_NAME, safe='')}" + return REPORT_URL + + +def _to_object(data): + if isinstance(data, dict): + return SimpleNamespace(**{k: _to_object(v) for k, v in data.items()}) + elif isinstance(data, list): + return [_to_object(i) for i in data] + else: + return data diff --git a/ci/praktika/_settings.py b/ci/praktika/_settings.py new file mode 100644 index 00000000000..bfd7ba6c1be --- /dev/null +++ b/ci/praktika/_settings.py @@ -0,0 +1,128 @@ +import dataclasses +from pathlib import Path +from typing import Dict, Iterable, List, Optional + + +@dataclasses.dataclass +class _Settings: + ###################################### + # Pipeline generation settings # + ###################################### + if Path("./ci_v2").is_dir(): + # TODO: hack for CH, remove + CI_PATH = "./ci_v2" + else: + CI_PATH = "./ci" + WORKFLOW_PATH_PREFIX: str = "./.github/workflows" + WORKFLOWS_DIRECTORY: str = f"{CI_PATH}/workflows" + SETTINGS_DIRECTORY: str = f"{CI_PATH}/settings" + CI_CONFIG_JOB_NAME = "Config Workflow" + DOCKER_BUILD_JOB_NAME = "Docker Builds" + FINISH_WORKFLOW_JOB_NAME = "Finish Workflow" + READY_FOR_MERGE_STATUS_NAME = "Ready for Merge" + CI_CONFIG_RUNS_ON: Optional[List[str]] = None + DOCKER_BUILD_RUNS_ON: Optional[List[str]] = None + VALIDATE_FILE_PATHS: bool = True + + ###################################### + # Runtime Settings # + ###################################### + MAX_RETRIES_S3 = 3 + MAX_RETRIES_GH = 3 + + ###################################### + # S3 (artifact storage) settings # + ###################################### + S3_ARTIFACT_PATH: str = "" + + ###################################### + # CI workspace settings # + ###################################### + TEMP_DIR: str = "/tmp/praktika" + OUTPUT_DIR: str = f"{TEMP_DIR}/output" + INPUT_DIR: str = f"{TEMP_DIR}/input" + PYTHON_INTERPRETER: str = "python3" + PYTHON_PACKET_MANAGER: str = "pip3" + PYTHON_VERSION: str = "3.9" + INSTALL_PYTHON_FOR_NATIVE_JOBS: bool = False + INSTALL_PYTHON_REQS_FOR_NATIVE_JOBS: str = "./ci/requirements.txt" + ENVIRONMENT_VAR_FILE: str = f"{TEMP_DIR}/environment.json" + RUN_LOG: str = f"{TEMP_DIR}/praktika_run.log" + + SECRET_GH_APP_ID: str = "GH_APP_ID" + SECRET_GH_APP_PEM_KEY: str = "GH_APP_PEM_KEY" + + ENV_SETUP_SCRIPT: str = "/tmp/praktika_setup_env.sh" + WORKFLOW_STATUS_FILE: str = f"{TEMP_DIR}/workflow_status.json" + + ###################################### + # CI Cache settings # + ###################################### + CACHE_VERSION: int = 1 + CACHE_DIGEST_LEN: int = 20 + CACHE_S3_PATH: str = "" + CACHE_LOCAL_PATH: str = f"{TEMP_DIR}/ci_cache" + + ###################################### + # Report settings # + ###################################### + HTML_S3_PATH: str = "" + HTML_PAGE_FILE: str = "./praktika/json.html" + TEXT_CONTENT_EXTENSIONS: Iterable[str] = frozenset([".txt", ".log"]) + S3_BUCKET_TO_HTTP_ENDPOINT: Optional[Dict[str, str]] = None + + DOCKERHUB_USERNAME: str = "" + DOCKERHUB_SECRET: str = "" + DOCKER_WD: str = "/wd" + + ###################################### + # CI DB Settings # + ###################################### + SECRET_CI_DB_URL: str = "CI_DB_URL" + SECRET_CI_DB_PASSWORD: str = "CI_DB_PASSWORD" + CI_DB_DB_NAME = "" + CI_DB_TABLE_NAME = "" + CI_DB_INSERT_TIMEOUT_SEC = 5 + + +_USER_DEFINED_SETTINGS = [ + "S3_ARTIFACT_PATH", + "CACHE_S3_PATH", + "HTML_S3_PATH", + "S3_BUCKET_TO_HTTP_ENDPOINT", + "TEXT_CONTENT_EXTENSIONS", + "TEMP_DIR", + "OUTPUT_DIR", + "INPUT_DIR", + "CI_CONFIG_RUNS_ON", + "DOCKER_BUILD_RUNS_ON", + "CI_CONFIG_JOB_NAME", + "PYTHON_INTERPRETER", + "PYTHON_VERSION", + "PYTHON_PACKET_MANAGER", + "INSTALL_PYTHON_FOR_NATIVE_JOBS", + "INSTALL_PYTHON_REQS_FOR_NATIVE_JOBS", + "MAX_RETRIES_S3", + "MAX_RETRIES_GH", + "VALIDATE_FILE_PATHS", + "DOCKERHUB_USERNAME", + "DOCKERHUB_SECRET", + "READY_FOR_MERGE_STATUS_NAME", + "SECRET_CI_DB_URL", + "SECRET_CI_DB_PASSWORD", + "CI_DB_DB_NAME", + "CI_DB_TABLE_NAME", + "CI_DB_INSERT_TIMEOUT_SEC", + "SECRET_GH_APP_PEM_KEY", + "SECRET_GH_APP_ID", +] + + +class GHRunners: + ubuntu = "ubuntu-latest" + + +if __name__ == "__main__": + for setting in _USER_DEFINED_SETTINGS: + print(_Settings().__getattribute__(setting)) + # print(dataclasses.asdict(_Settings())) diff --git a/ci/praktika/artifact.py b/ci/praktika/artifact.py new file mode 100644 index 00000000000..ba05f18b9b1 --- /dev/null +++ b/ci/praktika/artifact.py @@ -0,0 +1,33 @@ +from dataclasses import dataclass + + +class Artifact: + class Type: + GH = "github" + S3 = "s3" + PHONY = "phony" + + @dataclass + class Config: + """ + name - artifact name + type - artifact type, see Artifact.Type + path - file path or glob, e.g. "path/**/[abc]rtifac?/*" + """ + + name: str + type: str + path: str + _provided_by: str = "" + _s3_path: str = "" + + def is_s3_artifact(self): + return self.type == Artifact.Type.S3 + + @classmethod + def define_artifact(cls, name, type, path): + return cls.Config(name=name, type=type, path=path) + + @classmethod + def define_gh_artifact(cls, name, path): + return cls.define_artifact(name=name, type=cls.Type.GH, path=path) diff --git a/ci/praktika/cache.py b/ci/praktika/cache.py new file mode 100644 index 00000000000..cbaea9b489b --- /dev/null +++ b/ci/praktika/cache.py @@ -0,0 +1,127 @@ +import dataclasses +import json +from pathlib import Path + +from praktika import Artifact, Job, Workflow +from praktika._environment import _Environment +from praktika.digest import Digest +from praktika.s3 import S3 +from praktika.settings import Settings +from praktika.utils import Utils + + +class Cache: + @dataclasses.dataclass + class CacheRecord: + class Type: + SUCCESS = "success" + + type: str + sha: str + pr_number: int + branch: str + + def dump(self, path): + with open(path, "w", encoding="utf8") as f: + json.dump(dataclasses.asdict(self), f) + + @classmethod + def from_fs(cls, path): + with open(path, "r", encoding="utf8") as f: + return Cache.CacheRecord(**json.load(f)) + + @classmethod + def from_dict(cls, obj): + return Cache.CacheRecord(**obj) + + def __init__(self): + self.digest = Digest() + self.success = {} # type Dict[str, Any] + + @classmethod + def push_success_record(cls, job_name, job_digest, sha): + type_ = Cache.CacheRecord.Type.SUCCESS + record = Cache.CacheRecord( + type=type_, + sha=sha, + pr_number=_Environment.get().PR_NUMBER, + branch=_Environment.get().BRANCH, + ) + assert ( + Settings.CACHE_S3_PATH + ), f"Setting CACHE_S3_PATH must be defined with enabled CI Cache" + record_path = f"{Settings.CACHE_S3_PATH}/v{Settings.CACHE_VERSION}/{Utils.normalize_string(job_name)}/{job_digest}" + record_file = Path(Settings.TEMP_DIR) / type_ + record.dump(record_file) + S3.copy_file_to_s3(s3_path=record_path, local_path=record_file) + record_file.unlink() + + def fetch_success(self, job_name, job_digest): + type_ = Cache.CacheRecord.Type.SUCCESS + assert ( + Settings.CACHE_S3_PATH + ), f"Setting CACHE_S3_PATH must be defined with enabled CI Cache" + record_path = f"{Settings.CACHE_S3_PATH}/v{Settings.CACHE_VERSION}/{Utils.normalize_string(job_name)}/{job_digest}/{type_}" + record_file_local_dir = ( + f"{Settings.CACHE_LOCAL_PATH}/{Utils.normalize_string(job_name)}/" + ) + Path(record_file_local_dir).mkdir(parents=True, exist_ok=True) + + if S3.head_object(record_path): + res = S3.copy_file_from_s3( + s3_path=record_path, local_path=record_file_local_dir + ) + else: + res = None + + if res: + print(f"Cache record found, job [{job_name}], digest [{job_digest}]") + self.success[job_name] = True + return Cache.CacheRecord.from_fs(Path(record_file_local_dir) / type_) + return None + + +if __name__ == "__main__": + # test + c = Cache() + workflow = Workflow.Config( + name="TEST", + event=Workflow.Event.PULL_REQUEST, + jobs=[ + Job.Config( + name="JobA", + runs_on=["some"], + command="python -m unittest ./ci/tests/example_1/test_example_produce_artifact.py", + provides=["greet"], + job_requirements=Job.Requirements( + python_requirements_txt="./ci/requirements.txt" + ), + digest_config=Job.CacheDigestConfig( + # example: use glob to include files + include_paths=["./ci/tests/example_1/test_example_consume*.py"], + ), + ), + Job.Config( + name="JobB", + runs_on=["some"], + command="python -m unittest ./ci/tests/example_1/test_example_consume_artifact.py", + requires=["greet"], + job_requirements=Job.Requirements( + python_requirements_txt="./ci/requirements.txt" + ), + digest_config=Job.CacheDigestConfig( + # example: use dir to include files recursively + include_paths=["./ci/tests/example_1"], + # example: use glob to exclude files from digest + exclude_paths=[ + "./ci/tests/example_1/test_example_consume*", + "./**/*.pyc", + ], + ), + ), + ], + artifacts=[Artifact.Config(type="s3", name="greet", path="hello")], + enable_cache=True, + ) + for job in workflow.jobs: + print(c.digest.calc_job_digest(job)) diff --git a/ci/praktika/cidb.py b/ci/praktika/cidb.py new file mode 100644 index 00000000000..087845ec762 --- /dev/null +++ b/ci/praktika/cidb.py @@ -0,0 +1,136 @@ +import copy +import dataclasses +import json +from typing import Optional + +import requests +from praktika._environment import _Environment +from praktika.result import Result +from praktika.settings import Settings +from praktika.utils import Utils + + +class CIDB: + @dataclasses.dataclass + class TableRecord: + pull_request_number: int + commit_sha: str + commit_url: str + check_name: str + check_status: str + check_duration_ms: int + check_start_time: int + report_url: str + pull_request_url: str + base_ref: str + base_repo: str + head_ref: str + head_repo: str + task_url: str + instance_type: str + instance_id: str + test_name: str + test_status: str + test_duration_ms: Optional[int] + test_context_raw: str + + def __init__(self, url, passwd): + self.url = url + self.auth = { + "X-ClickHouse-User": "default", + "X-ClickHouse-Key": passwd, + } + + @classmethod + def json_data_generator(cls, result: Result): + env = _Environment.get() + base_record = cls.TableRecord( + pull_request_number=env.PR_NUMBER, + commit_sha=env.SHA, + commit_url=env.COMMIT_URL, + check_name=result.name, + check_status=result.status, + check_duration_ms=int(result.duration * 1000), + check_start_time=Utils.timestamp_to_str(result.start_time), + report_url=env.get_report_url(), + pull_request_url=env.CHANGE_URL, + base_ref=env.BASE_BRANCH, + base_repo=env.REPOSITORY, + head_ref=env.BRANCH, + # TODO: remove from table? + head_repo=env.REPOSITORY, + # TODO: remove from table? + task_url="", + instance_type=",".join([env.INSTANCE_TYPE, env.INSTANCE_LIFE_CYCLE]), + instance_id=env.INSTANCE_ID, + test_name="", + test_status="", + test_duration_ms=None, + test_context_raw=result.info, + ) + yield json.dumps(dataclasses.asdict(base_record)) + for result_ in result.results: + record = copy.deepcopy(base_record) + record.test_name = result_.name + if result_.start_time: + record.check_start_time = (Utils.timestamp_to_str(result.start_time),) + record.test_status = result_.status + record.test_duration_ms = int(result_.duration * 1000) + record.test_context_raw = result_.info + yield json.dumps(dataclasses.asdict(record)) + + def insert(self, result: Result): + # Create a session object + params = { + "database": Settings.CI_DB_DB_NAME, + "query": f"INSERT INTO {Settings.CI_DB_TABLE_NAME} FORMAT JSONEachRow", + "date_time_input_format": "best_effort", + "send_logs_level": "warning", + } + + session = requests.Session() + + for json_str in self.json_data_generator(result): + try: + response1 = session.post( + url=self.url, + params=params, + data=json_str, + headers=self.auth, + timeout=Settings.CI_DB_INSERT_TIMEOUT_SEC, + ) + except Exception as ex: + raise ex + + session.close() + + def check(self): + # Create a session object + params = { + "database": Settings.CI_DB_DB_NAME, + "query": f"SELECT 1", + } + try: + response = requests.post( + url=self.url, + params=params, + data="", + headers=self.auth, + timeout=Settings.CI_DB_INSERT_TIMEOUT_SEC, + ) + if not response.ok: + print("ERROR: No connection to CI DB") + return ( + False, + f"ERROR: No connection to CI DB [{response.status_code}/{response.reason}]", + ) + if not response.json() == 1: + print("ERROR: CI DB smoke test failed select 1 == 1") + return ( + False, + f"ERROR: CI DB smoke test failed [select 1 ==> {response.json()}]", + ) + except Exception as ex: + print(f"ERROR: Exception [{ex}]") + return False, "CIDB: ERROR: Exception [{ex}]" + return True, "" diff --git a/ci/praktika/digest.py b/ci/praktika/digest.py new file mode 100644 index 00000000000..44317d5249e --- /dev/null +++ b/ci/praktika/digest.py @@ -0,0 +1,100 @@ +import dataclasses +import hashlib +from hashlib import md5 +from typing import List + +from praktika import Job +from praktika.docker import Docker +from praktika.settings import Settings +from praktika.utils import Utils + + +class Digest: + def __init__(self): + self.digest_cache = {} + + @staticmethod + def _hash_digest_config(digest_config: Job.CacheDigestConfig) -> str: + data_dict = dataclasses.asdict(digest_config) + hash_obj = md5() + hash_obj.update(str(data_dict).encode()) + hash_string = hash_obj.hexdigest() + return hash_string + + def calc_job_digest(self, job_config: Job.Config): + config = job_config.digest_config + if not config: + return "f" * Settings.CACHE_DIGEST_LEN + + cache_key = self._hash_digest_config(config) + + if cache_key in self.digest_cache: + return self.digest_cache[cache_key] + + included_files = Utils.traverse_paths( + job_config.digest_config.include_paths, + job_config.digest_config.exclude_paths, + sorted=True, + ) + + print(f"calc digest: hash_key [{cache_key}], include [{included_files}] files") + # Sort files to ensure consistent hash calculation + included_files.sort() + + # Calculate MD5 hash + res = "" + if not included_files: + res = "f" * Settings.CACHE_DIGEST_LEN + print(f"NOTE: empty digest config [{config}] - return dummy digest") + else: + hash_md5 = hashlib.md5() + for file_path in included_files: + res = self._calc_file_digest(file_path, hash_md5) + assert res + self.digest_cache[cache_key] = res + return res + + def calc_docker_digest( + self, + docker_config: Docker.Config, + dependency_configs: List[Docker.Config], + hash_md5=None, + ): + """ + + :param hash_md5: + :param dependency_configs: list of Docker.Config(s) that :param docker_config: depends on + :param docker_config: Docker.Config to calculate digest for + :return: + """ + print(f"Calculate digest for docker [{docker_config.name}]") + paths = Utils.traverse_path(docker_config.path, sorted=True) + if not hash_md5: + hash_md5 = hashlib.md5() + + dependencies = [] + for dependency_name in docker_config.depends_on: + for dependency_config in dependency_configs: + if dependency_config.name == dependency_name: + print( + f"Add docker [{dependency_config.name}] as dependency for docker [{docker_config.name}] digest calculation" + ) + dependencies.append(dependency_config) + + for dependency in dependencies: + _ = self.calc_docker_digest(dependency, dependency_configs, hash_md5) + + for path in paths: + _ = self._calc_file_digest(path, hash_md5=hash_md5) + + return hash_md5.hexdigest()[: Settings.CACHE_DIGEST_LEN] + + @staticmethod + def _calc_file_digest(file_path, hash_md5): + # Calculate MD5 hash + with open(file_path, "rb") as f: + for chunk in iter(lambda: f.read(4096), b""): + hash_md5.update(chunk) + + res = hash_md5.hexdigest()[: Settings.CACHE_DIGEST_LEN] + return res diff --git a/ci/praktika/docker.py b/ci/praktika/docker.py new file mode 100644 index 00000000000..82e97b4624c --- /dev/null +++ b/ci/praktika/docker.py @@ -0,0 +1,60 @@ +import dataclasses +from typing import List + +from praktika.utils import Shell + + +class Docker: + class Platforms: + ARM = "linux/arm64" + AMD = "linux/amd64" + arm_amd = [ARM, AMD] + + @dataclasses.dataclass + class Config: + name: str + path: str + depends_on: List[str] + platforms: List[str] + + @classmethod + def build(cls, config: "Docker.Config", log_file, digests, add_latest): + tags_substr = f" -t {config.name}:{digests[config.name]}" + if add_latest: + tags_substr = f" -t {config.name}:latest" + + from_tag = "" + if config.depends_on: + assert ( + len(config.depends_on) == 1 + ), f"Only one dependency in depends_on is currently supported, docker [{config}]" + from_tag = f" --build-arg FROM_TAG={digests[config.depends_on[0]]}" + + command = f"docker buildx build --platform {','.join(config.platforms)} {tags_substr} {from_tag} --cache-to type=inline --cache-from type=registry,ref={config.name} --push {config.path}" + return Shell.run(command, log_file=log_file, verbose=True) + + @classmethod + def sort_in_build_order(cls, dockers: List["Docker.Config"]): + ready_names = [] + i = 0 + while i < len(dockers): + docker = dockers[i] + if not docker.depends_on or all( + dep in ready_names for dep in docker.depends_on + ): + ready_names.append(docker.name) + i += 1 + else: + dockers.append(dockers.pop(i)) + return dockers + + @classmethod + def login(cls, user_name, user_password): + print("Docker: log in to dockerhub") + return Shell.check( + f"docker login --username '{user_name}' --password-stdin", + strict=True, + stdin_str=user_password, + encoding="utf-8", + verbose=True, + ) diff --git a/ci/praktika/environment.py b/ci/praktika/environment.py new file mode 100644 index 00000000000..8f53aa6230b --- /dev/null +++ b/ci/praktika/environment.py @@ -0,0 +1,3 @@ +from praktika._environment import _Environment + +Environment = _Environment.get() diff --git a/ci/praktika/execution/__init__.py b/ci/praktika/execution/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/ci/praktika/execution/__main__.py b/ci/praktika/execution/__main__.py new file mode 100644 index 00000000000..c1f08fcca6a --- /dev/null +++ b/ci/praktika/execution/__main__.py @@ -0,0 +1,4 @@ +from praktika.execution.machine_init import run + +if __name__ == "__main__": + run() diff --git a/ci/praktika/execution/execution_settings.py b/ci/praktika/execution/execution_settings.py new file mode 100644 index 00000000000..d04b9a773ec --- /dev/null +++ b/ci/praktika/execution/execution_settings.py @@ -0,0 +1,31 @@ +import os + +from praktika.utils import MetaClasses + + +class ScalingType(metaclass=MetaClasses.WithIter): + DISABLED = "disabled" + AUTOMATIC_SCALE_DOWN = "scale_down" + AUTOMATIC_SCALE_UP_DOWN = "scale" + + +class DefaultExecutionSettings: + GH_ACTIONS_DIRECTORY: str = "/home/ubuntu/gh_actions" + RUNNER_SCALING_TYPE: str = ScalingType.AUTOMATIC_SCALE_UP_DOWN + MAX_WAIT_TIME_BEFORE_SCALE_DOWN_SEC: int = 30 + + +class ExecutionSettings: + GH_ACTIONS_DIRECTORY = os.getenv( + "GH_ACTIONS_DIRECTORY", DefaultExecutionSettings.GH_ACTIONS_DIRECTORY + ) + RUNNER_SCALING_TYPE = os.getenv( + "RUNNER_SCALING_TYPE", DefaultExecutionSettings.RUNNER_SCALING_TYPE + ) + MAX_WAIT_TIME_BEFORE_SCALE_DOWN_SEC = int( + os.getenv( + "MAX_WAIT_TIME_BEFORE_SCALE_DOWN_SEC", + DefaultExecutionSettings.MAX_WAIT_TIME_BEFORE_SCALE_DOWN_SEC, + ) + ) + LOCAL_EXECUTION = bool(os.getenv("CLOUD", "0") == "0") diff --git a/ci/praktika/execution/machine_init.py b/ci/praktika/execution/machine_init.py new file mode 100644 index 00000000000..7829538c5a9 --- /dev/null +++ b/ci/praktika/execution/machine_init.py @@ -0,0 +1,338 @@ +import os +import platform +import signal +import time +import traceback + +import requests +from praktika.execution.execution_settings import ExecutionSettings, ScalingType +from praktika.utils import ContextManager, Shell + + +class StateMachine: + class StateNames: + INIT = "init" + WAIT = "wait" + RUN = "run" + + def __init__(self): + self.state = self.StateNames.INIT + self.scale_type = ExecutionSettings.RUNNER_SCALING_TYPE + self.machine = Machine(scaling_type=self.scale_type).update_instance_info() + self.state_updated_at = int(time.time()) + self.forked = False + + def kick(self): + if self.state == self.StateNames.INIT: + self.machine.config_actions().run_actions_async() + print("State Machine: INIT -> WAIT") + self.state = self.StateNames.WAIT + self.state_updated_at = int(time.time()) + # TODO: add monitoring + if not self.machine.is_actions_process_healthy(): + print(f"ERROR: GH runner process unexpectedly died") + self.machine.self_terminate(decrease_capacity=False) + elif self.state == self.StateNames.WAIT: + res = self.machine.check_job_assigned() + if res: + print("State Machine: WAIT -> RUN") + self.state = self.StateNames.RUN + self.state_updated_at = int(time.time()) + self.check_scale_up() + else: + self.check_scale_down() + elif self.state == self.StateNames.RUN: + res = self.machine.check_job_running() + if res: + pass + else: + print("State Machine: RUN -> INIT") + self.state = self.StateNames.INIT + self.state_updated_at = int(time.time()) + + def check_scale_down(self): + if self.scale_type not in ( + ScalingType.AUTOMATIC_SCALE_DOWN, + ScalingType.AUTOMATIC_SCALE_UP_DOWN, + ): + return + if ScalingType.AUTOMATIC_SCALE_UP_DOWN and not self.forked: + print( + f"Scaling type is AUTOMATIC_SCALE_UP_DOWN and machine has not run a job - do not scale down" + ) + return + if ( + int(time.time()) - self.state_updated_at + > ExecutionSettings.MAX_WAIT_TIME_BEFORE_SCALE_DOWN_SEC + ): + print( + f"No job assigned for more than MAX_WAIT_TIME_BEFORE_SCALE_DOWN_SEC [{ExecutionSettings.MAX_WAIT_TIME_BEFORE_SCALE_DOWN_SEC}] - scale down the instance" + ) + if not ExecutionSettings.LOCAL_EXECUTION: + self.machine.self_terminate(decrease_capacity=True) + else: + print("Local execution - skip scaling operation") + + def check_scale_up(self): + if self.scale_type not in (ScalingType.AUTOMATIC_SCALE_UP_DOWN,): + return + if self.forked: + print("This instance already forked once - do not scale up") + return + self.machine.self_fork() + self.forked = True + + def run(self): + self.machine.unconfig_actions() + while True: + self.kick() + time.sleep(5) + + def terminate(self): + try: + self.machine.unconfig_actions() + except: + print("WARNING: failed to unconfig runner") + if not ExecutionSettings.LOCAL_EXECUTION: + if self.machine is not None: + self.machine.self_terminate(decrease_capacity=False) + time.sleep(10) + # wait termination + print("ERROR: failed to terminate instance via aws cli - try os call") + os.system("sudo shutdown now") + else: + print("NOTE: Local execution - machine won't be terminated") + + +class Machine: + @staticmethod + def get_latest_gh_actions_release(): + url = f"https://api.github.com/repos/actions/runner/releases/latest" + response = requests.get(url, timeout=5) + if response.status_code == 200: + latest_release = response.json() + return latest_release["tag_name"].removeprefix("v") + else: + print(f"Failed to get the latest release: {response.status_code}") + return None + + def __init__(self, scaling_type): + self.os_name = platform.system().lower() + assert self.os_name == "linux", f"Unsupported OS [{self.os_name}]" + if platform.machine() == "x86_64": + self.arch = "x64" + elif "aarch64" in platform.machine().lower(): + self.arch = "arm64" + else: + assert False, f"Unsupported arch [{platform.machine()}]" + self.instance_id = None + self.asg_name = None + self.runner_api_endpoint = None + self.runner_type = None + self.labels = [] + self.proc = None + assert scaling_type in ScalingType + self.scaling_type = scaling_type + + def install_gh_actions_runner(self): + gh_actions_version = self.get_latest_gh_actions_release() + assert self.os_name and gh_actions_version and self.arch + Shell.check( + f"rm -rf {ExecutionSettings.GH_ACTIONS_DIRECTORY}", + strict=True, + verbose=True, + ) + Shell.check( + f"mkdir {ExecutionSettings.GH_ACTIONS_DIRECTORY}", strict=True, verbose=True + ) + with ContextManager.cd(ExecutionSettings.GH_ACTIONS_DIRECTORY): + Shell.check( + f"curl -O -L https://github.com/actions/runner/releases/download/v{gh_actions_version}/actions-runner-{self.os_name}-{self.arch}-{gh_actions_version}.tar.gz", + strict=True, + verbose=True, + ) + Shell.check(f"tar xzf *tar.gz", strict=True, verbose=True) + Shell.check(f"rm -f *tar.gz", strict=True, verbose=True) + Shell.check(f"sudo ./bin/installdependencies.sh", strict=True, verbose=True) + Shell.check( + f"chown -R ubuntu:ubuntu {ExecutionSettings.GH_ACTIONS_DIRECTORY}", + strict=True, + verbose=True, + ) + + def _get_gh_token_from_ssm(self): + gh_token = Shell.get_output_or_raise( + "/usr/local/bin/aws ssm get-parameter --name github_runner_registration_token --with-decryption --output text --query Parameter.Value" + ) + return gh_token + + def update_instance_info(self): + self.instance_id = Shell.get_output_or_raise("ec2metadata --instance-id") + assert self.instance_id + self.asg_name = Shell.get_output( + f"aws ec2 describe-instances --instance-id {self.instance_id} --query \"Reservations[].Instances[].Tags[?Key=='aws:autoscaling:groupName'].Value\" --output text" + ) + # self.runner_type = Shell.get_output_or_raise( + # f'/usr/local/bin/aws ec2 describe-tags --filters "Name=resource-id,Values={self.instance_id}" --query "Tags[?Key==\'github:runner-type\'].Value" --output text' + # ) + self.runner_type = self.asg_name + if ( + self.scaling_type != ScalingType.DISABLED + and not ExecutionSettings.LOCAL_EXECUTION + ): + assert ( + self.asg_name and self.runner_type + ), f"Failed to retrieve ASG name, which is required for scaling_type [{self.scaling_type}]" + org = os.getenv("MY_ORG", "") + assert ( + org + ), "MY_ORG env variable myst be set to use init script for runner machine" + self.runner_api_endpoint = f"https://github.com/{org}" + + self.labels = ["self-hosted", self.runner_type] + return self + + @classmethod + def check_job_assigned(cls): + runner_pid = Shell.get_output_or_raise("pgrep Runner.Listener") + if not runner_pid: + print("check_job_assigned: No runner pid") + return False + log_file = Shell.get_output_or_raise( + f"lsof -p {runner_pid} | grep -o {ExecutionSettings.GH_ACTIONS_DIRECTORY}/_diag/Runner.*log" + ) + if not log_file: + print("check_job_assigned: No log file") + return False + return Shell.check(f"grep -q 'Terminal] .* Running job:' {log_file}") + + def check_job_running(self): + if self.proc is None: + print(f"WARNING: No job started") + return False + exit_code = self.proc.poll() + if exit_code is None: + return True + else: + print(f"Job runner finished with exit code [{exit_code}]") + self.proc = None + return False + + def config_actions(self): + if not self.instance_id: + self.update_instance_info() + token = self._get_gh_token_from_ssm() + assert token and self.instance_id and self.runner_api_endpoint and self.labels + command = f"sudo -u ubuntu {ExecutionSettings.GH_ACTIONS_DIRECTORY}/config.sh --token {token} \ + --url {self.runner_api_endpoint} --ephemeral --unattended --replace \ + --runnergroup Default --labels {','.join(self.labels)} --work wd --name {self.instance_id}" + res = 1 + i = 0 + while i < 10 and res != 0: + res = Shell.run(command) + i += 1 + if res != 0: + print( + f"ERROR: failed to configure GH actions runner after [{i}] attempts, exit code [{res}], retry after 10s" + ) + time.sleep(10) + self._get_gh_token_from_ssm() + if res == 0: + print("GH action runner has been configured") + else: + assert False, "GH actions runner configuration failed" + return self + + def unconfig_actions(self): + token = self._get_gh_token_from_ssm() + command = f"sudo -u ubuntu {ExecutionSettings.GH_ACTIONS_DIRECTORY}/config.sh remove --token {token}" + Shell.check(command, strict=True) + return self + + def run_actions_async(self): + command = f"sudo -u ubuntu {ExecutionSettings.GH_ACTIONS_DIRECTORY}/run.sh" + self.proc = Shell.run_async(command) + assert self.proc is not None + return self + + def is_actions_process_healthy(self): + try: + if self.proc.poll() is None: + return True + + stdout, stderr = self.proc.communicate() + + if self.proc.returncode != 0: + # Handle failure + print( + f"GH Action process failed with return code {self.proc.returncode}" + ) + print(f"Error output: {stderr}") + return False + else: + print(f"GH Action process is not running") + return False + except Exception as e: + print(f"GH Action process exception: {e}") + return False + + def self_terminate(self, decrease_capacity): + print( + f"WARNING: Self terminate is called, decrease_capacity [{decrease_capacity}]" + ) + traceback.print_stack() + if not self.instance_id: + self.update_instance_info() + assert self.instance_id + command = f"aws autoscaling terminate-instance-in-auto-scaling-group --instance-id {self.instance_id}" + if decrease_capacity: + command += " --should-decrement-desired-capacity" + else: + command += " --no-should-decrement-desired-capacity" + Shell.check( + command=command, + verbose=True, + ) + + def self_fork(self): + current_capacity = Shell.get_output( + f'aws autoscaling describe-auto-scaling-groups --auto-scaling-group-name {self.asg_name} \ + --query "AutoScalingGroups[0].DesiredCapacity" --output text' + ) + current_capacity = int(current_capacity) + if not current_capacity: + print("ERROR: failed to get current capacity - cannot scale up") + return + desired_capacity = current_capacity + 1 + command = f"aws autoscaling set-desired-capacity --auto-scaling-group-name {self.asg_name} --desired-capacity {desired_capacity}" + print(f"Increase capacity [{current_capacity} -> {desired_capacity}]") + res = Shell.check( + command=command, + verbose=True, + ) + if not res: + print("ERROR: failed to increase capacity - cannot scale up") + + +def handle_signal(signum, _frame): + print(f"FATAL: Received signal {signum}") + raise RuntimeError(f"killed by signal {signum}") + + +def run(): + signal.signal(signal.SIGINT, handle_signal) + signal.signal(signal.SIGTERM, handle_signal) + m = None + try: + m = StateMachine() + m.run() + except Exception as e: + print(f"FATAL: Exception [{e}] - terminate instance") + time.sleep(10) + if m: + m.terminate() + raise e + + +if __name__ == "__main__": + run() diff --git a/ci/praktika/favicon/lambda_function.py b/ci/praktika/favicon/lambda_function.py new file mode 100644 index 00000000000..7d89566de8c --- /dev/null +++ b/ci/praktika/favicon/lambda_function.py @@ -0,0 +1,102 @@ +import base64 +import random +import struct +import zlib + + +def create_favicon(): + # Image dimensions + width = 32 + height = 32 + + # Initialize a transparent background image (RGBA: 4 bytes per pixel) + image_data = bytearray( + [0, 0, 0, 0] * width * height + ) # Set alpha to 0 for transparency + + # Draw 4 vertical lines with color #FAFF68 (RGB: 250, 255, 104) + line_color = [250, 255, 104, 255] # RGBA for #FAFF68 with full opacity + line_width = 4 + space_width = 3 + x_start = space_width + line_number = 4 + + line_height = height - space_width + + for i in range(line_number): + # Randomly pick a starting y position for each line + y_start = random.randint(0, height - 1) + + # Draw the line with random shift along Y-axis + for y in range(line_height): + y_pos = (y + y_start) % height + for x in range(line_width): + pixel_index = (y_pos * width + x_start + x) * 4 + image_data[pixel_index : pixel_index + 4] = line_color + + x_start += line_width + space_width + + # Convert the RGBA image to PNG format + png_data = create_png(width, height, image_data) + + # Convert PNG to ICO format + ico_data = create_ico(png_data) + + return ico_data + + +def create_png(width, height, image_data): + def write_chunk(chunk_type, data): + chunk_len = struct.pack(">I", len(data)) + chunk_crc = struct.pack(">I", zlib.crc32(chunk_type + data) & 0xFFFFFFFF) + return chunk_len + chunk_type + data + chunk_crc + + png_signature = b"\x89PNG\r\n\x1a\n" + ihdr_chunk = struct.pack(">IIBBBBB", width, height, 8, 6, 0, 0, 0) + idat_data = zlib.compress( + b"".join( + b"\x00" + image_data[y * width * 4 : (y + 1) * width * 4] + for y in range(height) + ), + 9, + ) + idat_chunk = write_chunk(b"IDAT", idat_data) + iend_chunk = write_chunk(b"IEND", b"") + + return png_signature + write_chunk(b"IHDR", ihdr_chunk) + idat_chunk + iend_chunk + + +def create_ico(png_data): + # ICO header: reserved (2 bytes), type (2 bytes), image count (2 bytes) + ico_header = struct.pack(" None: + wf = _get_workflows(workflow_name) # type: List[Workflow.Config] + pem = wf[0].get_secret(Settings.SECRET_GH_APP_PEM_KEY).get_value() + assert pem + app_id = wf[0].get_secret(Settings.SECRET_GH_APP_ID).get_value() + # Generate JWT + jwt_token = cls._generate_jwt(app_id, pem) + # Get Installation ID + installation_id = cls._get_installation_id(jwt_token) + # Get Installation Access Token + access_token = cls._get_access_token(jwt_token, installation_id) + Shell.check(f"echo {access_token} | gh auth login --with-token", strict=True) + + +if __name__ == "__main__": + GHAuth.auth(sys.argv[1]) diff --git a/ci/praktika/hook_cache.py b/ci/praktika/hook_cache.py new file mode 100644 index 00000000000..b1b5c654f20 --- /dev/null +++ b/ci/praktika/hook_cache.py @@ -0,0 +1,124 @@ +from praktika._environment import _Environment +from praktika.cache import Cache +from praktika.mangle import _get_workflows +from praktika.runtime import RunConfig +from praktika.settings import Settings +from praktika.utils import Utils + + +class CacheRunnerHooks: + @classmethod + def configure(cls, _workflow): + workflow_config = RunConfig.from_fs(_workflow.name) + cache = Cache() + assert _Environment.get().WORKFLOW_NAME + workflow = _get_workflows(name=_Environment.get().WORKFLOW_NAME)[0] + print(f"Workflow Configure, workflow [{workflow.name}]") + assert ( + workflow.enable_cache + ), f"Outdated yaml pipelines or BUG. Configuration must be run only for workflow with enabled cache, workflow [{workflow.name}]" + artifact_digest_map = {} + job_digest_map = {} + for job in workflow.jobs: + if not job.digest_config: + print( + f"NOTE: job [{job.name}] has no Config.digest_config - skip cache check, always run" + ) + digest = cache.digest.calc_job_digest(job_config=job) + job_digest_map[job.name] = digest + if job.provides: + # assign the job digest also to the artifacts it provides + for artifact in job.provides: + artifact_digest_map[artifact] = digest + for job in workflow.jobs: + digests_combined_list = [] + if job.requires: + # include digest of required artifact to the job digest, so that they affect job state + for artifact_name in job.requires: + if artifact_name not in [ + artifact.name for artifact in workflow.artifacts + ]: + # phony artifact assumed to be not affecting jobs that depend on it + continue + digests_combined_list.append(artifact_digest_map[artifact_name]) + digests_combined_list.append(job_digest_map[job.name]) + final_digest = "-".join(digests_combined_list) + workflow_config.digest_jobs[job.name] = final_digest + + assert ( + workflow_config.digest_jobs + ), f"BUG, Workflow with enabled cache must have job digests after configuration, wf [{workflow.name}]" + + print("Check remote cache") + job_to_cache_record = {} + for job_name, job_digest in workflow_config.digest_jobs.items(): + record = cache.fetch_success(job_name=job_name, job_digest=job_digest) + if record: + assert ( + Utils.normalize_string(job_name) + not in workflow_config.cache_success + ) + workflow_config.cache_success.append(job_name) + workflow_config.cache_success_base64.append(Utils.to_base64(job_name)) + job_to_cache_record[job_name] = record + + print("Check artifacts to reuse") + for job in workflow.jobs: + if job.name in workflow_config.cache_success: + if job.provides: + for artifact_name in job.provides: + workflow_config.cache_artifacts[artifact_name] = ( + job_to_cache_record[job.name] + ) + + print(f"Write config to GH's job output") + with open(_Environment.get().JOB_OUTPUT_STREAM, "a", encoding="utf8") as f: + print( + f"DATA={workflow_config.to_json()}", + file=f, + ) + print(f"WorkflowRuntimeConfig: [{workflow_config.to_json(pretty=True)}]") + print( + "Dump WorkflowConfig to fs, the next hooks in this job might want to see it" + ) + workflow_config.dump() + + return workflow_config + + @classmethod + def pre_run(cls, _workflow, _job, _required_artifacts=None): + path_prefixes = [] + if _job.name == Settings.CI_CONFIG_JOB_NAME: + # SPECIAL handling + return path_prefixes + env = _Environment.get() + runtime_config = RunConfig.from_fs(_workflow.name) + required_artifacts = [] + if _required_artifacts: + required_artifacts = _required_artifacts + for artifact in required_artifacts: + if artifact.name in runtime_config.cache_artifacts: + record = runtime_config.cache_artifacts[artifact.name] + print(f"Reuse artifact [{artifact.name}] from [{record}]") + path_prefixes.append( + env.get_s3_prefix_static( + record.pr_number, record.branch, record.sha + ) + ) + else: + path_prefixes.append(env.get_s3_prefix()) + return path_prefixes + + @classmethod + def run(cls, workflow, job): + pass + + @classmethod + def post_run(cls, workflow, job): + if job.name == Settings.CI_CONFIG_JOB_NAME: + return + if job.digest_config: + # cache is enabled, and it's a job that supposed to be cached (has defined digest config) + workflow_runtime = RunConfig.from_fs(workflow.name) + job_digest = workflow_runtime.digest_jobs[job.name] + Cache.push_success_record(job.name, job_digest, workflow_runtime.sha) diff --git a/ci/praktika/hook_html.py b/ci/praktika/hook_html.py new file mode 100644 index 00000000000..c998e817fe7 --- /dev/null +++ b/ci/praktika/hook_html.py @@ -0,0 +1,153 @@ +import urllib.parse +from pathlib import Path + +from praktika._environment import _Environment +from praktika.gh import GH +from praktika.parser import WorkflowConfigParser +from praktika.result import Result, ResultInfo +from praktika.runtime import RunConfig +from praktika.s3 import S3 +from praktika.settings import Settings +from praktika.utils import Utils + + +class HtmlRunnerHooks: + @classmethod + def configure(cls, _workflow): + # generate pending Results for all jobs in the workflow + if _workflow.enable_cache: + skip_jobs = RunConfig.from_fs(_workflow.name).cache_success + else: + skip_jobs = [] + + env = _Environment.get() + results = [] + for job in _workflow.jobs: + if job.name not in skip_jobs: + result = Result.generate_pending(job.name) + else: + result = Result.generate_skipped(job.name) + results.append(result) + summary_result = Result.generate_pending(_workflow.name, results=results) + summary_result.aux_links.append(env.CHANGE_URL) + summary_result.aux_links.append(env.RUN_URL) + summary_result.start_time = Utils.timestamp() + page_url = "/".join( + ["https:/", Settings.HTML_S3_PATH, str(Path(Settings.HTML_PAGE_FILE).name)] + ) + for bucket, endpoint in Settings.S3_BUCKET_TO_HTTP_ENDPOINT.items(): + page_url = page_url.replace(bucket, endpoint) + # TODO: add support for non-PRs (use branch?) + page_url += f"?PR={env.PR_NUMBER}&sha=latest&name_0={urllib.parse.quote(env.WORKFLOW_NAME, safe='')}" + summary_result.html_link = page_url + + # clean the previous latest results in PR if any + if env.PR_NUMBER: + S3.clean_latest_result() + S3.copy_result_to_s3( + summary_result, + unlock=False, + ) + + print(f"CI Status page url [{page_url}]") + + res1 = GH.post_commit_status( + name=_workflow.name, + status=Result.Status.PENDING, + description="", + url=page_url, + ) + res2 = GH.post_pr_comment( + comment_body=f"Workflow [[{_workflow.name}]({page_url})], commit [{_Environment.get().SHA[:8]}]", + or_update_comment_with_substring=f"Workflow [", + ) + if not (res1 or res2): + print( + "ERROR: Failed to set both GH commit status and PR comment with Workflow Status, cannot proceed" + ) + raise + + @classmethod + def pre_run(cls, _workflow, _job): + result = Result.from_fs(_job.name) + S3.copy_result_from_s3( + Result.file_name_static(_workflow.name), + ) + workflow_result = Result.from_fs(_workflow.name) + workflow_result.update_sub_result(result) + S3.copy_result_to_s3( + workflow_result, + unlock=True, + ) + + @classmethod + def run(cls, _workflow, _job): + pass + + @classmethod + def post_run(cls, _workflow, _job, info_errors): + result = Result.from_fs(_job.name) + env = _Environment.get() + S3.copy_result_from_s3( + Result.file_name_static(_workflow.name), + lock=True, + ) + workflow_result = Result.from_fs(_workflow.name) + print(f"Workflow info [{workflow_result.info}], info_errors [{info_errors}]") + + env_info = env.REPORT_INFO + if env_info: + print( + f"WARNING: some info lines are set in Environment - append to report [{env_info}]" + ) + info_errors += env_info + if info_errors: + info_errors = [f" | {error}" for error in info_errors] + info_str = f"{_job.name}:\n" + info_str += "\n".join(info_errors) + print("Update workflow results with new info") + workflow_result.set_info(info_str) + + old_status = workflow_result.status + + S3.upload_result_files_to_s3(result) + workflow_result.update_sub_result(result) + + skipped_job_results = [] + if not result.is_ok(): + print( + "Current job failed - find dependee jobs in the workflow and set their statuses to skipped" + ) + workflow_config_parsed = WorkflowConfigParser(_workflow).parse() + for dependee_job in workflow_config_parsed.workflow_yaml_config.jobs: + if _job.name in dependee_job.needs: + if _workflow.get_job(dependee_job.name).run_unless_cancelled: + continue + print( + f"NOTE: Set job [{dependee_job.name}] status to [{Result.Status.SKIPPED}] due to current failure" + ) + skipped_job_results.append( + Result( + name=dependee_job.name, + status=Result.Status.SKIPPED, + info=ResultInfo.SKIPPED_DUE_TO_PREVIOUS_FAILURE + + f" [{_job.name}]", + ) + ) + for skipped_job_result in skipped_job_results: + workflow_result.update_sub_result(skipped_job_result) + + S3.copy_result_to_s3( + workflow_result, + unlock=True, + ) + if workflow_result.status != old_status: + print( + f"Update GH commit status [{result.name}]: [{old_status} -> {workflow_result.status}], link [{workflow_result.html_link}]" + ) + GH.post_commit_status( + name=workflow_result.name, + status=GH.convert_to_gh_status(workflow_result.status), + description="", + url=workflow_result.html_link, + ) diff --git a/ci/praktika/hook_interface.py b/ci/praktika/hook_interface.py new file mode 100644 index 00000000000..762ee62eeb1 --- /dev/null +++ b/ci/praktika/hook_interface.py @@ -0,0 +1,43 @@ +from abc import ABC, abstractmethod + +from praktika import Workflow + + +class HookInterface(ABC): + @abstractmethod + def pre_run(self, _workflow, _job): + """ + runs in pre-run step + :param _workflow: + :param _job: + :return: + """ + pass + + @abstractmethod + def run(self, _workflow, _job): + """ + runs in run step + :param _workflow: + :param _job: + :return: + """ + pass + + @abstractmethod + def post_run(self, _workflow, _job): + """ + runs in post-run step + :param _workflow: + :param _job: + :return: + """ + pass + + @abstractmethod + def configure(self, _workflow: Workflow.Config): + """ + runs in initial WorkflowConfig job in run step + :return: + """ + pass diff --git a/ci/praktika/html_prepare.py b/ci/praktika/html_prepare.py new file mode 100644 index 00000000000..54bee2f6bbf --- /dev/null +++ b/ci/praktika/html_prepare.py @@ -0,0 +1,10 @@ +from praktika.s3 import S3 +from praktika.settings import Settings + + +class Html: + @classmethod + def prepare(cls): + S3.copy_file_to_s3( + s3_path=Settings.HTML_S3_PATH, local_path=Settings.HTML_PAGE_FILE + ) diff --git a/ci/praktika/job.py b/ci/praktika/job.py new file mode 100644 index 00000000000..d0d4232cfa2 --- /dev/null +++ b/ci/praktika/job.py @@ -0,0 +1,102 @@ +import copy +import json +from dataclasses import dataclass, field +from typing import Any, List, Optional + + +class Job: + @dataclass + class Requirements: + python: bool = False + python_requirements_txt: str = "" + + @dataclass + class CacheDigestConfig: + include_paths: List[str] = field(default_factory=list) + exclude_paths: List[str] = field(default_factory=list) + + @dataclass + class Config: + # Job Name + name: str + + # Machine's label to run job on. For instance [ubuntu-latest] for free gh runner + runs_on: List[str] + + # Job Run Command + command: str + + # What job requires + # May be phony or physical names + requires: List[str] = field(default_factory=list) + + # What job provides + # May be phony or physical names + provides: List[str] = field(default_factory=list) + + job_requirements: Optional["Job.Requirements"] = None + + timeout: int = 1 * 3600 + + digest_config: Optional["Job.CacheDigestConfig"] = None + + run_in_docker: str = "" + + run_unless_cancelled: bool = False + + allow_merge_on_failure: bool = False + + parameter: Any = None + + def parametrize( + self, + parameter: Optional[List[Any]] = None, + runs_on: Optional[List[List[str]]] = None, + timeout: Optional[List[int]] = None, + ): + assert ( + parameter or runs_on + ), "Either :parameter or :runs_on must be non empty list for parametrisation" + if not parameter: + parameter = [None] * len(runs_on) + if not runs_on: + runs_on = [None] * len(parameter) + if not timeout: + timeout = [None] * len(parameter) + assert ( + len(parameter) == len(runs_on) == len(timeout) + ), "Parametrization lists must be of the same size" + + res = [] + for parameter_, runs_on_, timeout_ in zip(parameter, runs_on, timeout): + obj = copy.deepcopy(self) + if parameter_: + obj.parameter = parameter_ + if runs_on_: + obj.runs_on = runs_on_ + if timeout_: + obj.timeout = timeout_ + obj.name = obj.get_job_name_with_parameter() + res.append(obj) + return res + + def get_job_name_with_parameter(self): + name, parameter, runs_on = self.name, self.parameter, self.runs_on + res = name + name_params = [] + if isinstance(parameter, list) or isinstance(parameter, dict): + name_params.append(json.dumps(parameter)) + elif parameter is not None: + name_params.append(parameter) + if runs_on: + assert isinstance(runs_on, list) + name_params.append(json.dumps(runs_on)) + if name_params: + name_params = [str(param) for param in name_params] + res += f" ({', '.join(name_params)})" + + self.name = res + return res + + def __repr__(self): + return self.name diff --git a/ci/praktika/json.html b/ci/praktika/json.html new file mode 100644 index 00000000000..fe7b65a5ec5 --- /dev/null +++ b/ci/praktika/json.html @@ -0,0 +1,727 @@ + + + + + + praktika report + + + + +
+
+
+ +
+
+
+
+ ☀️ +
+
+ + + + diff --git a/ci/praktika/mangle.py b/ci/praktika/mangle.py new file mode 100644 index 00000000000..89fc52cf849 --- /dev/null +++ b/ci/praktika/mangle.py @@ -0,0 +1,137 @@ +import copy +import importlib.util +from pathlib import Path +from typing import Any, Dict + +from praktika import Job +from praktika._settings import _USER_DEFINED_SETTINGS, _Settings +from praktika.utils import ContextManager, Utils + + +def _get_workflows(name=None, file=None): + """ + Gets user's workflow configs + """ + res = [] + + with ContextManager.cd(): + directory = Path(_Settings.WORKFLOWS_DIRECTORY) + for py_file in directory.glob("*.py"): + if file and file not in str(py_file): + continue + module_name = py_file.name.removeprefix(".py") + spec = importlib.util.spec_from_file_location( + module_name, f"{_Settings.WORKFLOWS_DIRECTORY}/{module_name}" + ) + assert spec + foo = importlib.util.module_from_spec(spec) + assert spec.loader + spec.loader.exec_module(foo) + try: + for workflow in foo.WORKFLOWS: + if name: + if name == workflow.name: + print(f"Read workflow [{name}] config from [{module_name}]") + res = [workflow] + break + else: + continue + else: + res += foo.WORKFLOWS + print(f"Read workflow configs from [{module_name}]") + except Exception as e: + print( + f"WARNING: Failed to add WORKFLOWS config from [{module_name}], exception [{e}]" + ) + if not res: + Utils.raise_with_error(f"Failed to find workflow [{name or file}]") + + for workflow in res: + # add native jobs + _update_workflow_with_native_jobs(workflow) + # fill in artifact properties, e.g. _provided_by + _update_workflow_artifacts(workflow) + return res + + +def _update_workflow_artifacts(workflow): + artifact_job = {} + for job in workflow.jobs: + for artifact_name in job.provides: + assert artifact_name not in artifact_job + artifact_job[artifact_name] = job.name + for artifact in workflow.artifacts: + artifact._provided_by = artifact_job[artifact.name] + + +def _update_workflow_with_native_jobs(workflow): + if workflow.dockers: + from praktika.native_jobs import _docker_build_job + + print(f"Enable native job [{_docker_build_job.name}] for [{workflow.name}]") + aux_job = copy.deepcopy(_docker_build_job) + if workflow.enable_cache: + print( + f"Add automatic digest config for [{aux_job.name}] job since cache is enabled" + ) + docker_digest_config = Job.CacheDigestConfig() + for docker_config in workflow.dockers: + docker_digest_config.include_paths.append(docker_config.path) + aux_job.digest_config = docker_digest_config + + workflow.jobs.insert(0, aux_job) + for job in workflow.jobs[1:]: + if not job.requires: + job.requires = [] + job.requires.append(aux_job.name) + + if ( + workflow.enable_cache + or workflow.enable_report + or workflow.enable_merge_ready_status + ): + from praktika.native_jobs import _workflow_config_job + + print(f"Enable native job [{_workflow_config_job.name}] for [{workflow.name}]") + aux_job = copy.deepcopy(_workflow_config_job) + workflow.jobs.insert(0, aux_job) + for job in workflow.jobs[1:]: + if not job.requires: + job.requires = [] + job.requires.append(aux_job.name) + + if workflow.enable_merge_ready_status: + from praktika.native_jobs import _final_job + + print(f"Enable native job [{_final_job.name}] for [{workflow.name}]") + aux_job = copy.deepcopy(_final_job) + for job in workflow.jobs: + aux_job.requires.append(job.name) + workflow.jobs.append(aux_job) + + +def _get_user_settings() -> Dict[str, Any]: + """ + Gets user's settings + """ + res = {} # type: Dict[str, Any] + + directory = Path(_Settings.SETTINGS_DIRECTORY) + for py_file in directory.glob("*.py"): + module_name = py_file.name.removeprefix(".py") + spec = importlib.util.spec_from_file_location( + module_name, f"{_Settings.SETTINGS_DIRECTORY}/{module_name}" + ) + assert spec + foo = importlib.util.module_from_spec(spec) + assert spec.loader + spec.loader.exec_module(foo) + for setting in _USER_DEFINED_SETTINGS: + try: + value = getattr(foo, setting) + res[setting] = value + print(f"Apply user defined setting [{setting} = {value}]") + except Exception as e: + pass + + return res diff --git a/ci/praktika/native_jobs.py b/ci/praktika/native_jobs.py new file mode 100644 index 00000000000..f7fd4ca190b --- /dev/null +++ b/ci/praktika/native_jobs.py @@ -0,0 +1,378 @@ +import sys +from typing import Dict + +from praktika import Job, Workflow +from praktika._environment import _Environment +from praktika.cidb import CIDB +from praktika.digest import Digest +from praktika.docker import Docker +from praktika.gh import GH +from praktika.hook_cache import CacheRunnerHooks +from praktika.hook_html import HtmlRunnerHooks +from praktika.mangle import _get_workflows +from praktika.result import Result, ResultInfo +from praktika.runtime import RunConfig +from praktika.s3 import S3 +from praktika.settings import Settings +from praktika.utils import Shell, Utils + +assert Settings.CI_CONFIG_RUNS_ON + +_workflow_config_job = Job.Config( + name=Settings.CI_CONFIG_JOB_NAME, + runs_on=Settings.CI_CONFIG_RUNS_ON, + job_requirements=( + Job.Requirements( + python=Settings.INSTALL_PYTHON_FOR_NATIVE_JOBS, + python_requirements_txt=Settings.INSTALL_PYTHON_REQS_FOR_NATIVE_JOBS, + ) + if Settings.INSTALL_PYTHON_REQS_FOR_NATIVE_JOBS + else None + ), + command=f"{Settings.PYTHON_INTERPRETER} -m praktika.native_jobs '{Settings.CI_CONFIG_JOB_NAME}'", +) + +_docker_build_job = Job.Config( + name=Settings.DOCKER_BUILD_JOB_NAME, + runs_on=Settings.DOCKER_BUILD_RUNS_ON, + job_requirements=Job.Requirements( + python=Settings.INSTALL_PYTHON_FOR_NATIVE_JOBS, + python_requirements_txt="", + ), + timeout=4 * 3600, + command=f"{Settings.PYTHON_INTERPRETER} -m praktika.native_jobs '{Settings.DOCKER_BUILD_JOB_NAME}'", +) + +_final_job = Job.Config( + name=Settings.FINISH_WORKFLOW_JOB_NAME, + runs_on=Settings.CI_CONFIG_RUNS_ON, + job_requirements=Job.Requirements( + python=Settings.INSTALL_PYTHON_FOR_NATIVE_JOBS, + python_requirements_txt="", + ), + command=f"{Settings.PYTHON_INTERPRETER} -m praktika.native_jobs '{Settings.FINISH_WORKFLOW_JOB_NAME}'", + run_unless_cancelled=True, +) + + +def _build_dockers(workflow, job_name): + print(f"Start [{job_name}], workflow [{workflow.name}]") + dockers = workflow.dockers + ready = [] + results = [] + job_status = Result.Status.SUCCESS + job_info = "" + dockers = Docker.sort_in_build_order(dockers) + docker_digests = {} # type: Dict[str, str] + for docker in dockers: + docker_digests[docker.name] = Digest().calc_docker_digest(docker, dockers) + + if not Shell.check( + "docker buildx inspect --bootstrap | grep -q docker-container", verbose=True + ): + print("Install docker container driver") + if not Shell.check( + "docker buildx create --use --name mybuilder --driver docker-container", + verbose=True, + ): + job_status = Result.Status.FAILED + job_info = "Failed to install docker buildx driver" + + if job_status == Result.Status.SUCCESS: + if not Docker.login( + Settings.DOCKERHUB_USERNAME, + user_password=workflow.get_secret(Settings.DOCKERHUB_SECRET).get_value(), + ): + job_status = Result.Status.FAILED + job_info = "Failed to login to dockerhub" + + if job_status == Result.Status.SUCCESS: + for docker in dockers: + assert ( + docker.name not in ready + ), f"All docker names must be uniq [{dockers}]" + stopwatch = Utils.Stopwatch() + info = f"{docker.name}:{docker_digests[docker.name]}" + log_file = f"{Settings.OUTPUT_DIR}/docker_{Utils.normalize_string(docker.name)}.log" + files = [] + + code, out, err = Shell.get_res_stdout_stderr( + f"docker manifest inspect {docker.name}:{docker_digests[docker.name]}" + ) + print( + f"Docker inspect results for {docker.name}:{docker_digests[docker.name]}: exit code [{code}], out [{out}], err [{err}]" + ) + if "no such manifest" in err: + ret_code = Docker.build( + docker, log_file=log_file, digests=docker_digests, add_latest=False + ) + if ret_code == 0: + status = Result.Status.SUCCESS + else: + status = Result.Status.FAILED + job_status = Result.Status.FAILED + info += f", failed with exit code: {ret_code}, see log" + files.append(log_file) + else: + print( + f"Docker image [{docker.name}:{docker_digests[docker.name]} exists - skip build" + ) + status = Result.Status.SKIPPED + ready.append(docker.name) + results.append( + Result( + name=docker.name, + status=status, + info=info, + duration=stopwatch.duration, + start_time=stopwatch.start_time, + files=files, + ) + ) + Result.from_fs(job_name).set_status(job_status).set_results(results).set_info( + job_info + ) + + if job_status != Result.Status.SUCCESS: + sys.exit(1) + + +def _config_workflow(workflow: Workflow.Config, job_name): + def _check_yaml_up_to_date(): + print("Check workflows are up to date") + stop_watch = Utils.Stopwatch() + exit_code, output, err = Shell.get_res_stdout_stderr( + f"git diff-index HEAD -- {Settings.WORKFLOW_PATH_PREFIX}" + ) + info = "" + status = Result.Status.SUCCESS + if exit_code != 0: + info = f"workspace has uncommitted files unexpectedly [{output}]" + status = Result.Status.ERROR + print("ERROR: ", info) + else: + Shell.check(f"{Settings.PYTHON_INTERPRETER} -m praktika --generate") + exit_code, output, err = Shell.get_res_stdout_stderr( + f"git diff-index HEAD -- {Settings.WORKFLOW_PATH_PREFIX}" + ) + if exit_code != 0: + info = f"workspace has outdated workflows [{output}] - regenerate with [python -m praktika --generate]" + status = Result.Status.ERROR + print("ERROR: ", info) + + return ( + Result( + name="Check Workflows updated", + status=status, + start_time=stop_watch.start_time, + duration=stop_watch.duration, + info=info, + ), + info, + ) + + def _check_secrets(secrets): + print("Check Secrets") + stop_watch = Utils.Stopwatch() + infos = [] + for secret_config in secrets: + value = secret_config.get_value() + if not value: + info = f"ERROR: Failed to read secret [{secret_config.name}]" + infos.append(info) + print(info) + + info = "\n".join(infos) + return ( + Result( + name="Check Secrets", + status=(Result.Status.FAILED if infos else Result.Status.SUCCESS), + start_time=stop_watch.start_time, + duration=stop_watch.duration, + info=info, + ), + info, + ) + + def _check_db(workflow): + stop_watch = Utils.Stopwatch() + res, info = CIDB( + workflow.get_secret(Settings.SECRET_CI_DB_URL).get_value(), + workflow.get_secret(Settings.SECRET_CI_DB_PASSWORD).get_value(), + ).check() + return ( + Result( + name="Check CI DB", + status=(Result.Status.FAILED if not res else Result.Status.SUCCESS), + start_time=stop_watch.start_time, + duration=stop_watch.duration, + info=info, + ), + info, + ) + + print(f"Start [{job_name}], workflow [{workflow.name}]") + results = [] + files = [] + info_lines = [] + job_status = Result.Status.SUCCESS + + workflow_config = RunConfig( + name=workflow.name, + digest_jobs={}, + digest_dockers={}, + sha=_Environment.get().SHA, + cache_success=[], + cache_success_base64=[], + cache_artifacts={}, + ).dump() + + # checks: + result_, info = _check_yaml_up_to_date() + if result_.status != Result.Status.SUCCESS: + print("ERROR: yaml files are outdated - regenerate, commit and push") + job_status = Result.Status.ERROR + info_lines.append(job_name + ": " + info) + results.append(result_) + + if workflow.secrets: + result_, info = _check_secrets(workflow.secrets) + if result_.status != Result.Status.SUCCESS: + print(f"ERROR: Invalid secrets in workflow [{workflow.name}]") + job_status = Result.Status.ERROR + info_lines.append(job_name + ": " + info) + results.append(result_) + + if workflow.enable_cidb: + result_, info = _check_db(workflow) + if result_.status != Result.Status.SUCCESS: + job_status = Result.Status.ERROR + info_lines.append(job_name + ": " + info) + results.append(result_) + + # config: + if workflow.dockers: + print("Calculate docker's digests") + dockers = workflow.dockers + dockers = Docker.sort_in_build_order(dockers) + for docker in dockers: + workflow_config.digest_dockers[docker.name] = Digest().calc_docker_digest( + docker, dockers + ) + workflow_config.dump() + + if workflow.enable_cache: + print("Cache Lookup") + stop_watch = Utils.Stopwatch() + workflow_config = CacheRunnerHooks.configure(workflow) + results.append( + Result( + name="Cache Lookup", + status=Result.Status.SUCCESS, + start_time=stop_watch.start_time, + duration=stop_watch.duration, + ) + ) + files.append(RunConfig.file_name_static(workflow.name)) + + workflow_config.dump() + + if workflow.enable_report: + print("Init report") + stop_watch = Utils.Stopwatch() + HtmlRunnerHooks.configure(workflow) + results.append( + Result( + name="Init Report", + status=Result.Status.SUCCESS, + start_time=stop_watch.start_time, + duration=stop_watch.duration, + ) + ) + files.append(Result.file_name_static(workflow.name)) + + Result.from_fs(job_name).set_status(job_status).set_results(results).set_files( + files + ).set_info("\n".join(info_lines)) + + if job_status != Result.Status.SUCCESS: + sys.exit(1) + + +def _finish_workflow(workflow, job_name): + print(f"Start [{job_name}], workflow [{workflow.name}]") + env = _Environment.get() + + print("Check Actions statuses") + print(env.get_needs_statuses()) + + print("Check Workflow results") + S3.copy_result_from_s3( + Result.file_name_static(workflow.name), + lock=False, + ) + workflow_result = Result.from_fs(workflow.name) + + ready_for_merge_status = Result.Status.SUCCESS + ready_for_merge_description = "" + failed_results = [] + update_final_report = False + for result in workflow_result.results: + if result.name == job_name or result.status in ( + Result.Status.SUCCESS, + Result.Status.SKIPPED, + ): + continue + if not result.is_completed(): + print( + f"ERROR: not finished job [{result.name}] in the workflow - set status to error" + ) + result.status = Result.Status.ERROR + # dump workflow result after update - to have an updated result in post + workflow_result.dump() + # add error into env - should apper in the report + env.add_info(ResultInfo.NOT_FINALIZED + f" [{result.name}]") + update_final_report = True + job = workflow.get_job(result.name) + if not job or not job.allow_merge_on_failure: + print( + f"NOTE: Result for [{result.name}] has not ok status [{result.status}]" + ) + ready_for_merge_status = Result.Status.FAILED + failed_results.append(result.name.split("(", maxsplit=1)[0]) # cut name + + if failed_results: + ready_for_merge_description = f"failed: {', '.join(failed_results)}" + + if not GH.post_commit_status( + name=Settings.READY_FOR_MERGE_STATUS_NAME + f" [{workflow.name}]", + status=ready_for_merge_status, + description=ready_for_merge_description, + url="", + ): + print(f"ERROR: failed to set status [{Settings.READY_FOR_MERGE_STATUS_NAME}]") + env.add_info(ResultInfo.GH_STATUS_ERROR) + + if update_final_report: + S3.copy_result_to_s3( + workflow_result, + unlock=False, + ) # no lock - no unlock + + Result.from_fs(job_name).set_status(Result.Status.SUCCESS).set_info( + ready_for_merge_description + ) + + +if __name__ == "__main__": + job_name = sys.argv[1] + assert job_name, "Job name must be provided as input argument" + workflow = _get_workflows(name=_Environment.get().WORKFLOW_NAME)[0] + if job_name == Settings.DOCKER_BUILD_JOB_NAME: + _build_dockers(workflow, job_name) + elif job_name == Settings.CI_CONFIG_JOB_NAME: + _config_workflow(workflow, job_name) + elif job_name == Settings.FINISH_WORKFLOW_JOB_NAME: + _finish_workflow(workflow, job_name) + else: + assert False, f"BUG, job name [{job_name}]" diff --git a/ci/praktika/parser.py b/ci/praktika/parser.py new file mode 100644 index 00000000000..95aa27c4576 --- /dev/null +++ b/ci/praktika/parser.py @@ -0,0 +1,258 @@ +import dataclasses +from typing import Any, Dict, List + +from praktika import Artifact, Workflow +from praktika.mangle import _get_workflows + + +class AddonType: + PY = "py" + + +@dataclasses.dataclass +class WorkflowYaml: + @dataclasses.dataclass + class JobYaml: + name: str + needs: List[str] + runs_on: List[str] + artifacts_gh_requires: List["WorkflowYaml.ArtifactYaml"] + artifacts_gh_provides: List["WorkflowYaml.ArtifactYaml"] + addons: List["WorkflowYaml.JobAddonYaml"] + gh_app_auth: bool + run_unless_cancelled: bool + parameter: Any + + def __repr__(self): + return self.name + + @dataclasses.dataclass + class ArtifactYaml: + name: str + provided_by: str + required_by: List[str] + path: str + type: str + + def __repr__(self): + return self.name + + @dataclasses.dataclass + class JobAddonYaml: + install_python: bool + requirements_txt_path: str + + name: str + event: str + branches: List[str] + jobs: List[JobYaml] + job_to_config: Dict[str, JobYaml] + artifact_to_config: Dict[str, ArtifactYaml] + secret_names_gh: List[str] + enable_cache: bool + + +class WorkflowConfigParser: + def __init__(self, config: Workflow.Config): + self.workflow_name = config.name + self.config = config + self.requires_all = [] # type: List[str] + self.provides_all = [] # type: List[str] + self.job_names_all = [] # type: List[str] + self.artifact_to_providing_job_map = {} # type: Dict[str, List[str]] + self.artifact_to_job_requires_map = {} # type: Dict[str, List[str]] + self.artifact_map = {} # type: Dict[str, List[Artifact.Config]] + + self.job_to_provides_artifacts = {} # type: Dict[str, List[Artifact.Config]] + self.job_to_requires_artifacts = {} # type: Dict[str, List[Artifact.Config]] + + self.workflow_yaml_config = WorkflowYaml( + name=self.workflow_name, + event=config.event, + branches=[], + jobs=[], + secret_names_gh=[], + job_to_config={}, + artifact_to_config={}, + enable_cache=False, + ) + + def parse(self): + self.workflow_yaml_config.enable_cache = self.config.enable_cache + + # populate WorkflowYaml.branches + if self.config.event in (Workflow.Event.PUSH,): + assert ( + self.config.branches + ), f'Workflow.Config.branches (e.g. ["main"]) must be set for workflow with event [{self.config.event}], workflow [{self.workflow_name}]' + assert ( + not self.config.base_branches + ), f'Workflow.Config.base_branches (e.g. ["main"]) must not be set for workflow with event [{self.config.event}], workflow [{self.workflow_name}]' + assert isinstance( + self.config.branches, list + ), f'Workflow.Config.branches must be of type list (e.g. ["main"]), workflow [{self.workflow_name}]' + self.workflow_yaml_config.branches = self.config.branches + elif self.config.event in (Workflow.Event.PULL_REQUEST,): + assert ( + self.config.base_branches + ), f'Workflow.Config.base_branches (e.g. ["main"]) must be set for workflow with event [{self.config.event}], workflow [{self.workflow_name}]' + assert ( + not self.config.branches + ), f'Workflow.Config.branches (e.g. ["main"]) must not be set for workflow with event [{self.config.event}], workflow [{self.workflow_name}]' + assert isinstance( + self.config.base_branches, list + ), f'Workflow.Config.base_branches must be of type list (e.g. ["main"]), workflow [{self.workflow_name}]' + self.workflow_yaml_config.branches = self.config.base_branches + + # populate WorkflowYaml.artifact_to_config with phony artifacts + for job in self.config.jobs: + assert ( + job.name not in self.workflow_yaml_config.artifact_to_config + ), f"Not uniq Job name [{job.name}], workflow [{self.workflow_name}]" + self.workflow_yaml_config.artifact_to_config[job.name] = ( + WorkflowYaml.ArtifactYaml( + name=job.name, + provided_by=job.name, + required_by=[], + path="", + type=Artifact.Type.PHONY, + ) + ) + + # populate jobs + for job in self.config.jobs: + job_yaml_config = WorkflowYaml.JobYaml( + name=job.name, + addons=[], + artifacts_gh_requires=[], + artifacts_gh_provides=[], + needs=[], + runs_on=[], + gh_app_auth=False, + run_unless_cancelled=job.run_unless_cancelled, + parameter=None, + ) + self.workflow_yaml_config.jobs.append(job_yaml_config) + assert ( + job.name not in self.workflow_yaml_config.job_to_config + ), f"Job name [{job.name}] is not uniq, workflow [{self.workflow_name}]" + self.workflow_yaml_config.job_to_config[job.name] = job_yaml_config + + # populate WorkflowYaml.artifact_to_config + if self.config.artifacts: + for artifact in self.config.artifacts: + assert ( + artifact.name not in self.workflow_yaml_config.artifact_to_config + ), f"Artifact name [{artifact.name}] is not uniq, workflow [{self.workflow_name}]" + artifact_yaml_config = WorkflowYaml.ArtifactYaml( + name=artifact.name, + provided_by="", + required_by=[], + path=artifact.path, + type=artifact.type, + ) + self.workflow_yaml_config.artifact_to_config[artifact.name] = ( + artifact_yaml_config + ) + + # populate ArtifactYaml.provided_by + for job in self.config.jobs: + if job.provides: + for artifact_name in job.provides: + assert ( + artifact_name in self.workflow_yaml_config.artifact_to_config + ), f"Artifact [{artifact_name}] has no config, job [{job.name}], workflow [{self.workflow_name}]" + assert not self.workflow_yaml_config.artifact_to_config[ + artifact_name + ].provided_by, f"Artifact [{artifact_name}] provided by multiple jobs [{self.workflow_yaml_config.artifact_to_config[artifact_name].provided_by}] and [{job.name}]" + self.workflow_yaml_config.artifact_to_config[ + artifact_name + ].provided_by = job.name + + # populate ArtifactYaml.required_by + for job in self.config.jobs: + if job.requires: + for artifact_name in job.requires: + assert ( + artifact_name in self.workflow_yaml_config.artifact_to_config + ), f"Artifact [{artifact_name}] has no config, job [{job.name}], workflow [{self.workflow_name}]" + assert self.workflow_yaml_config.artifact_to_config[ + artifact_name + ].provided_by, f"Artifact [{artifact_name}] has no job providing it, required by job [{job.name}], workflow [{self.workflow_name}]" + self.workflow_yaml_config.artifact_to_config[ + artifact_name + ].required_by.append(job.name) + + # populate JobYaml.addons + for job in self.config.jobs: + if job.job_requirements: + addon_yaml = WorkflowYaml.JobAddonYaml( + requirements_txt_path=job.job_requirements.python_requirements_txt, + install_python=job.job_requirements.python, + ) + self.workflow_yaml_config.job_to_config[job.name].addons.append( + addon_yaml + ) + + if self.config.enable_report: + for job in self.config.jobs: + # auth required for every job with enabled HTML, so that workflow summary status can be updated + self.workflow_yaml_config.job_to_config[job.name].gh_app_auth = True + + # populate JobYaml.runs_on + for job in self.config.jobs: + self.workflow_yaml_config.job_to_config[job.name].runs_on = job.runs_on + + # populate JobYaml.artifacts_gh_requires, JobYaml.artifacts_gh_provides and JobYaml.needs + for ( + artifact_name, + artifact, + ) in self.workflow_yaml_config.artifact_to_config.items(): + # assert ( + # artifact.provided_by + # and artifact.provided_by in self.workflow_yaml_config.job_to_config + # ), f"Artifact [{artifact_name}] has no valid job providing it [{artifact.provided_by}]" + for job_name in artifact.required_by: + if ( + artifact.provided_by + not in self.workflow_yaml_config.job_to_config[job_name].needs + ): + self.workflow_yaml_config.job_to_config[job_name].needs.append( + artifact.provided_by + ) + if artifact.type in (Artifact.Type.GH,): + self.workflow_yaml_config.job_to_config[ + job_name + ].artifacts_gh_requires.append(artifact) + elif artifact.type in (Artifact.Type.PHONY, Artifact.Type.S3): + pass + else: + assert ( + False + ), f"Artifact [{artifact_name}] has unsupported type [{artifact.type}]" + if not artifact.required_by and artifact.type != Artifact.Type.PHONY: + print( + f"WARNING: Artifact [{artifact_name}] provided by job [{artifact.provided_by}] not required by any job in workflow [{self.workflow_name}]" + ) + if artifact.type == Artifact.Type.GH: + self.workflow_yaml_config.job_to_config[ + artifact.provided_by + ].artifacts_gh_provides.append(artifact) + + # populate JobYaml.parametrize + for job in self.config.jobs: + self.workflow_yaml_config.job_to_config[job.name].parameter = job.parameter + + # populate secrets + for secret_config in self.config.secrets: + if secret_config.is_gh(): + self.workflow_yaml_config.secret_names_gh.append(secret_config.name) + + return self + + +if __name__ == "__main__": + # test + workflows = _get_workflows() + for workflow in workflows: + WorkflowConfigParser(workflow).parse() diff --git a/ci/praktika/result.py b/ci/praktika/result.py new file mode 100644 index 00000000000..3d3c986d5f9 --- /dev/null +++ b/ci/praktika/result.py @@ -0,0 +1,354 @@ +import dataclasses +import datetime +import sys +from collections.abc import Container +from pathlib import Path +from typing import Any, Dict, List, Optional + +from praktika._environment import _Environment +from praktika._settings import _Settings +from praktika.utils import ContextManager, MetaClasses, Shell, Utils + + +@dataclasses.dataclass +class Result(MetaClasses.Serializable): + """ + Represents the outcome of a workflow/job/task or any operation, along with associated metadata. + + This class supports nesting of results to represent tasks with sub-tasks, and includes + various attributes to track status, timing, files, and links. + + Attributes: + name (str): The name of the task. + status (str): The current status of the task. Should be one of the values defined in the Status class. + start_time (Optional[float]): The start time of the task in Unix timestamp format. None if not started. + duration (Optional[float]): The duration of the task in seconds. None if not completed. + results (List[Result]): A list of sub-results representing nested tasks. + files (List[str]): A list of file paths or names related to the result. + links (List[str]): A list of URLs related to the result (e.g., links to reports or resources). + info (str): Additional information about the result. Free-form text. + # TODO: rename + aux_links (List[str]): A list of auxiliary links that provide additional context for the result. + # TODO: remove + html_link (str): A direct link to an HTML representation of the result (e.g., a detailed report page). + + Inner Class: + Status: Defines possible statuses for the task, such as "success", "failure", etc. + """ + + class Status: + SKIPPED = "skipped" + SUCCESS = "success" + FAILED = "failure" + PENDING = "pending" + RUNNING = "running" + ERROR = "error" + + name: str + status: str + start_time: Optional[float] = None + duration: Optional[float] = None + results: List["Result"] = dataclasses.field(default_factory=list) + files: List[str] = dataclasses.field(default_factory=list) + links: List[str] = dataclasses.field(default_factory=list) + info: str = "" + aux_links: List[str] = dataclasses.field(default_factory=list) + html_link: str = "" + + @staticmethod + def create_from( + name="", + results: List["Result"] = None, + stopwatch: Utils.Stopwatch = None, + status="", + files=None, + info="", + with_info_from_results=True, + ): + if isinstance(status, bool): + status = Result.Status.SUCCESS if status else Result.Status.FAILED + if not results and not status: + print("ERROR: Either .results or .status must be provided") + raise + if not name: + name = _Environment.get().JOB_NAME + if not name: + print("ERROR: Failed to guess the .name") + raise + result_status = status or Result.Status.SUCCESS + infos = [] + if info: + if isinstance(info, Container): + infos += info + else: + infos.append(info) + if results and not status: + for result in results: + if result.status not in (Result.Status.SUCCESS, Result.Status.FAILED): + Utils.raise_with_error( + f"Unexpected result status [{result.status}] for Result.create_from call" + ) + if result.status != Result.Status.SUCCESS: + result_status = Result.Status.FAILED + if results: + for result in results: + if result.info and with_info_from_results: + infos.append(f"{result.name}: {result.info}") + return Result( + name=name, + status=result_status, + start_time=stopwatch.start_time if stopwatch else None, + duration=stopwatch.duration if stopwatch else None, + info="\n".join(infos) if infos else "", + results=results or [], + files=files or [], + ) + + @staticmethod + def get(): + return Result.from_fs(_Environment.get().JOB_NAME) + + def is_completed(self): + return self.status not in (Result.Status.PENDING, Result.Status.RUNNING) + + def is_running(self): + return self.status not in (Result.Status.RUNNING,) + + def is_ok(self): + return self.status in (Result.Status.SKIPPED, Result.Status.SUCCESS) + + def set_status(self, status) -> "Result": + self.status = status + self.dump() + return self + + def set_success(self) -> "Result": + return self.set_status(Result.Status.SUCCESS) + + def set_results(self, results: List["Result"]) -> "Result": + self.results = results + self.dump() + return self + + def set_files(self, files) -> "Result": + for file in files: + assert Path( + file + ).is_file(), f"Not valid file [{file}] from file list [{files}]" + if not self.files: + self.files = [] + self.files += files + self.dump() + return self + + def set_info(self, info: str) -> "Result": + if self.info: + self.info += "\n" + self.info += info + self.dump() + return self + + def set_link(self, link) -> "Result": + self.links.append(link) + self.dump() + return self + + @classmethod + def file_name_static(cls, name): + return f"{_Settings.TEMP_DIR}/result_{Utils.normalize_string(name)}.json" + + @classmethod + def from_dict(cls, obj: Dict[str, Any]) -> "Result": + sub_results = [] + for result_dict in obj["results"] or []: + sub_res = cls.from_dict(result_dict) + sub_results.append(sub_res) + obj["results"] = sub_results + return Result(**obj) + + def update_duration(self): + if not self.duration and self.start_time: + self.duration = datetime.datetime.utcnow().timestamp() - self.start_time + else: + if not self.duration: + print( + f"NOTE: duration is set for job [{self.name}] Result - do not update by CI" + ) + else: + print( + f"NOTE: start_time is not set for job [{self.name}] Result - do not update duration" + ) + return self + + def update_sub_result(self, result: "Result"): + assert self.results, "BUG?" + for i, result_ in enumerate(self.results): + if result_.name == result.name: + self.results[i] = result + self._update_status() + return self + + def _update_status(self): + was_pending = False + was_running = False + if self.status == self.Status.PENDING: + was_pending = True + if self.status == self.Status.RUNNING: + was_running = True + + has_pending, has_running, has_failed = False, False, False + for result_ in self.results: + if result_.status in (self.Status.RUNNING,): + has_running = True + if result_.status in (self.Status.PENDING,): + has_pending = True + if result_.status in (self.Status.ERROR, self.Status.FAILED): + has_failed = True + if has_running: + self.status = self.Status.RUNNING + elif has_pending: + self.status = self.Status.PENDING + elif has_failed: + self.status = self.Status.FAILED + else: + self.status = self.Status.SUCCESS + if (was_pending or was_running) and self.status not in ( + self.Status.PENDING, + self.Status.RUNNING, + ): + print("Pipeline finished") + self.update_duration() + + @classmethod + def generate_pending(cls, name, results=None): + return Result( + name=name, + status=Result.Status.PENDING, + start_time=None, + duration=None, + results=results or [], + files=[], + links=[], + info="", + ) + + @classmethod + def generate_skipped(cls, name, results=None): + return Result( + name=name, + status=Result.Status.SKIPPED, + start_time=None, + duration=None, + results=results or [], + files=[], + links=[], + info="from cache", + ) + + @classmethod + def create_from_command_execution( + cls, + name, + command, + with_log=False, + fail_fast=True, + workdir=None, + command_args=None, + command_kwargs=None, + ): + """ + Executes shell commands or Python callables, optionally logging output, and handles errors. + + :param name: Check name + :param command: Shell command (str) or Python callable, or list of them. + :param workdir: Optional working directory. + :param with_log: Boolean flag to log output to a file. + :param fail_fast: Boolean flag to stop execution if one command fails. + :param command_args: Positional arguments for the callable command. + :param command_kwargs: Keyword arguments for the callable command. + :return: Result object with status and optional log file. + """ + + # Stopwatch to track execution time + stop_watch_ = Utils.Stopwatch() + command_args = command_args or [] + command_kwargs = command_kwargs or {} + + # Set log file path if logging is enabled + log_file = ( + f"{_Settings.TEMP_DIR}/{Utils.normalize_string(name)}.log" + if with_log + else None + ) + + # Ensure the command is a list for consistent iteration + if not isinstance(command, list): + fail_fast = False + command = [command] + + print(f"> Starting execution for [{name}]") + res = True # Track success/failure status + error_infos = [] + for command_ in command: + if callable(command_): + # If command is a Python function, call it with provided arguments + result = command_(*command_args, **command_kwargs) + if isinstance(result, bool): + res = result + elif result: + error_infos.append(str(result)) + res = False + else: + # Run shell command in a specified directory with logging and verbosity + with ContextManager.cd(workdir): + exit_code = Shell.run(command_, verbose=True, log_file=log_file) + res = exit_code == 0 + + # If fail_fast is enabled, stop on first failure + if not res and fail_fast: + print(f"Execution stopped due to failure in [{command_}]") + break + + # Create and return the result object with status and log file (if any) + return Result.create_from( + name=name, + status=res, + stopwatch=stop_watch_, + info=error_infos, + files=[log_file] if log_file else None, + ) + + def finish_job_accordingly(self): + self.dump() + if not self.is_ok(): + print("ERROR: Job Failed") + for result in self.results: + if not result.is_ok(): + print("Failed checks:") + print(" | ", result) + sys.exit(1) + else: + print("ok") + + +class ResultInfo: + SETUP_ENV_JOB_FAILED = ( + "Failed to set up job env, it's praktika bug or misconfiguration" + ) + PRE_JOB_FAILED = ( + "Failed to do a job pre-run step, it's praktika bug or misconfiguration" + ) + KILLED = "Job killed or terminated, no Result provided" + NOT_FOUND_IMPOSSIBLE = ( + "No Result file (bug, or job misbehaviour, must not ever happen)" + ) + SKIPPED_DUE_TO_PREVIOUS_FAILURE = "Skipped due to previous failure" + TIMEOUT = "Timeout" + + GH_STATUS_ERROR = "Failed to set GH commit status" + + NOT_FINALIZED = ( + "Job did not not provide Result: job script bug, died CI runner or praktika bug" + ) + + S3_ERROR = "S3 call failure" diff --git a/ci/praktika/runner.py b/ci/praktika/runner.py new file mode 100644 index 00000000000..15e759397ec --- /dev/null +++ b/ci/praktika/runner.py @@ -0,0 +1,348 @@ +import os +import re +import sys +import traceback +from pathlib import Path + +from praktika._environment import _Environment +from praktika.artifact import Artifact +from praktika.cidb import CIDB +from praktika.digest import Digest +from praktika.hook_cache import CacheRunnerHooks +from praktika.hook_html import HtmlRunnerHooks +from praktika.result import Result, ResultInfo +from praktika.runtime import RunConfig +from praktika.s3 import S3 +from praktika.settings import Settings +from praktika.utils import Shell, TeePopen, Utils + + +class Runner: + @staticmethod + def generate_dummy_environment(workflow, job): + print("WARNING: Generate dummy env for local test") + Shell.check( + f"mkdir -p {Settings.TEMP_DIR} {Settings.INPUT_DIR} {Settings.OUTPUT_DIR}" + ) + _Environment( + WORKFLOW_NAME=workflow.name, + JOB_NAME=job.name, + REPOSITORY="", + BRANCH="", + SHA="", + PR_NUMBER=-1, + EVENT_TYPE="", + JOB_OUTPUT_STREAM="", + EVENT_FILE_PATH="", + CHANGE_URL="", + COMMIT_URL="", + BASE_BRANCH="", + RUN_URL="", + RUN_ID="", + INSTANCE_ID="", + INSTANCE_TYPE="", + INSTANCE_LIFE_CYCLE="", + ).dump() + workflow_config = RunConfig( + name=workflow.name, + digest_jobs={}, + digest_dockers={}, + sha="", + cache_success=[], + cache_success_base64=[], + cache_artifacts={}, + ) + for docker in workflow.dockers: + workflow_config.digest_dockers[docker.name] = Digest().calc_docker_digest( + docker, workflow.dockers + ) + workflow_config.dump() + + Result.generate_pending(job.name).dump() + + def _setup_env(self, _workflow, job): + # source env file to write data into fs (workflow config json, workflow status json) + Shell.check(f". {Settings.ENV_SETUP_SCRIPT}", verbose=True, strict=True) + + # parse the same env script and apply envs from python so that this process sees them + with open(Settings.ENV_SETUP_SCRIPT, "r") as f: + content = f.read() + export_pattern = re.compile( + r"export (\w+)=\$\(cat<<\'EOF\'\n(.*?)EOF\n\)", re.DOTALL + ) + matches = export_pattern.findall(content) + for key, value in matches: + value = value.strip() + os.environ[key] = value + print(f"Set environment variable {key}.") + + # TODO: remove + os.environ["PYTHONPATH"] = os.getcwd() + + print("Read GH Environment") + env = _Environment.from_env() + env.JOB_NAME = job.name + env.PARAMETER = job.parameter + env.dump() + print(env) + + return 0 + + def _pre_run(self, workflow, job): + env = _Environment.get() + + result = Result( + name=job.name, + status=Result.Status.RUNNING, + start_time=Utils.timestamp(), + ) + result.dump() + + if workflow.enable_report and job.name != Settings.CI_CONFIG_JOB_NAME: + print("Update Job and Workflow Report") + HtmlRunnerHooks.pre_run(workflow, job) + + print("Download required artifacts") + required_artifacts = [] + if job.requires and workflow.artifacts: + for requires_artifact_name in job.requires: + for artifact in workflow.artifacts: + if ( + artifact.name == requires_artifact_name + and artifact.type == Artifact.Type.S3 + ): + required_artifacts.append(artifact) + print(f"--- Job requires s3 artifacts [{required_artifacts}]") + if workflow.enable_cache: + prefixes = CacheRunnerHooks.pre_run( + _job=job, _workflow=workflow, _required_artifacts=required_artifacts + ) + else: + prefixes = [env.get_s3_prefix()] * len(required_artifacts) + for artifact, prefix in zip(required_artifacts, prefixes): + s3_path = f"{Settings.S3_ARTIFACT_PATH}/{prefix}/{Utils.normalize_string(artifact._provided_by)}/{Path(artifact.path).name}" + assert S3.copy_file_from_s3(s3_path=s3_path, local_path=Settings.INPUT_DIR) + + return 0 + + def _run(self, workflow, job, docker="", no_docker=False, param=None): + if param: + if not isinstance(param, str): + Utils.raise_with_error( + f"Custom param for local tests must be of type str, got [{type(param)}]" + ) + env = _Environment.get() + env.LOCAL_RUN_PARAM = param + env.dump() + print(f"Custom param for local tests [{param}] dumped into Environment") + + if job.run_in_docker and not no_docker: + # TODO: add support for any image, including not from ci config (e.g. ubuntu:latest) + docker_tag = RunConfig.from_fs(workflow.name).digest_dockers[ + job.run_in_docker + ] + docker = docker or f"{job.run_in_docker}:{docker_tag}" + cmd = f"docker run --rm --user \"$(id -u):$(id -g)\" -e PYTHONPATH='{Settings.DOCKER_WD}' --volume ./:{Settings.DOCKER_WD} --volume {Settings.TEMP_DIR}:{Settings.TEMP_DIR} --workdir={Settings.DOCKER_WD} {docker} {job.command}" + else: + cmd = job.command + print(f"--- Run command [{cmd}]") + + with TeePopen(cmd, timeout=job.timeout) as process: + exit_code = process.wait() + + result = Result.from_fs(job.name) + if exit_code != 0: + if not result.is_completed(): + if process.timeout_exceeded: + print( + f"WARNING: Job timed out: [{job.name}], timeout [{job.timeout}], exit code [{exit_code}]" + ) + result.set_status(Result.Status.ERROR).set_info( + ResultInfo.TIMEOUT + ) + elif result.is_running(): + info = f"ERROR: Job terminated with an error, exit code [{exit_code}] - set status to [{Result.Status.ERROR}]" + print(info) + result.set_status(Result.Status.ERROR).set_info(info) + else: + info = f"ERROR: Invalid status [{result.status}] for exit code [{exit_code}] - switch to [{Result.Status.ERROR}]" + print(info) + result.set_status(Result.Status.ERROR).set_info(info) + result.dump() + + return exit_code + + def _post_run( + self, workflow, job, setup_env_exit_code, prerun_exit_code, run_exit_code + ): + info_errors = [] + env = _Environment.get() + result_exist = Result.exist(job.name) + + if setup_env_exit_code != 0: + info = f"ERROR: {ResultInfo.SETUP_ENV_JOB_FAILED}" + print(info) + # set Result with error and logs + Result( + name=job.name, + status=Result.Status.ERROR, + start_time=Utils.timestamp(), + duration=0.0, + info=info, + ).dump() + elif prerun_exit_code != 0: + info = f"ERROR: {ResultInfo.PRE_JOB_FAILED}" + print(info) + # set Result with error and logs + Result( + name=job.name, + status=Result.Status.ERROR, + start_time=Utils.timestamp(), + duration=0.0, + info=info, + ).dump() + elif not result_exist: + info = f"ERROR: {ResultInfo.NOT_FOUND_IMPOSSIBLE}" + print(info) + Result( + name=job.name, + start_time=Utils.timestamp(), + duration=None, + status=Result.Status.ERROR, + info=ResultInfo.NOT_FOUND_IMPOSSIBLE, + ).dump() + + result = Result.from_fs(job.name) + + if not result.is_completed(): + info = f"ERROR: {ResultInfo.KILLED}" + print(info) + result.set_info(info).set_status(Result.Status.ERROR).dump() + + result.set_files(files=[Settings.RUN_LOG]) + result.update_duration().dump() + + if result.info and result.status != Result.Status.SUCCESS: + # provide job info to workflow level + info_errors.append(result.info) + + if run_exit_code == 0: + providing_artifacts = [] + if job.provides and workflow.artifacts: + for provides_artifact_name in job.provides: + for artifact in workflow.artifacts: + if ( + artifact.name == provides_artifact_name + and artifact.type == Artifact.Type.S3 + ): + providing_artifacts.append(artifact) + if providing_artifacts: + print(f"Job provides s3 artifacts [{providing_artifacts}]") + for artifact in providing_artifacts: + try: + assert Shell.check( + f"ls -l {artifact.path}", verbose=True + ), f"Artifact {artifact.path} not found" + s3_path = f"{Settings.S3_ARTIFACT_PATH}/{env.get_s3_prefix()}/{Utils.normalize_string(env.JOB_NAME)}" + link = S3.copy_file_to_s3( + s3_path=s3_path, local_path=artifact.path + ) + result.set_link(link) + except Exception as e: + error = ( + f"ERROR: Failed to upload artifact [{artifact}], ex [{e}]" + ) + print(error) + info_errors.append(error) + result.set_status(Result.Status.ERROR) + + if workflow.enable_cidb: + print("Insert results to CIDB") + try: + CIDB( + url=workflow.get_secret(Settings.SECRET_CI_DB_URL).get_value(), + passwd=workflow.get_secret( + Settings.SECRET_CI_DB_PASSWORD + ).get_value(), + ).insert(result) + except Exception as ex: + error = f"ERROR: Failed to insert data into CI DB, exception [{ex}]" + print(error) + info_errors.append(error) + + result.dump() + + # always in the end + if workflow.enable_cache: + print(f"Run CI cache hook") + if result.is_ok(): + CacheRunnerHooks.post_run(workflow, job) + + if workflow.enable_report: + print(f"Run html report hook") + HtmlRunnerHooks.post_run(workflow, job, info_errors) + + return True + + def run( + self, workflow, job, docker="", dummy_env=False, no_docker=False, param=None + ): + res = True + setup_env_code = -10 + prerun_code = -10 + run_code = -10 + + if res and not dummy_env: + print( + f"\n\n=== Setup env script [{job.name}], workflow [{workflow.name}] ===" + ) + try: + setup_env_code = self._setup_env(workflow, job) + # Source the bash script and capture the environment variables + res = setup_env_code == 0 + if not res: + print( + f"ERROR: Setup env script failed with exit code [{setup_env_code}]" + ) + except Exception as e: + print(f"ERROR: Setup env script failed with exception [{e}]") + traceback.print_exc() + print(f"=== Setup env finished ===\n\n") + else: + self.generate_dummy_environment(workflow, job) + + if res and not dummy_env: + res = False + print(f"=== Pre run script [{job.name}], workflow [{workflow.name}] ===") + try: + prerun_code = self._pre_run(workflow, job) + res = prerun_code == 0 + if not res: + print(f"ERROR: Pre-run failed with exit code [{prerun_code}]") + except Exception as e: + print(f"ERROR: Pre-run script failed with exception [{e}]") + traceback.print_exc() + print(f"=== Pre run finished ===\n\n") + + if res: + res = False + print(f"=== Run script [{job.name}], workflow [{workflow.name}] ===") + try: + run_code = self._run( + workflow, job, docker=docker, no_docker=no_docker, param=param + ) + res = run_code == 0 + if not res: + print(f"ERROR: Run failed with exit code [{run_code}]") + except Exception as e: + print(f"ERROR: Run script failed with exception [{e}]") + traceback.print_exc() + print(f"=== Run scrip finished ===\n\n") + + if not dummy_env: + print(f"=== Post run script [{job.name}], workflow [{workflow.name}] ===") + self._post_run(workflow, job, setup_env_code, prerun_code, run_code) + print(f"=== Post run scrip finished ===") + + if not res: + sys.exit(1) diff --git a/ci/praktika/runtime.py b/ci/praktika/runtime.py new file mode 100644 index 00000000000..a87b67c2c79 --- /dev/null +++ b/ci/praktika/runtime.py @@ -0,0 +1,35 @@ +from dataclasses import dataclass +from typing import Dict, List + +from praktika.cache import Cache +from praktika.settings import Settings +from praktika.utils import MetaClasses, Utils + + +@dataclass +class RunConfig(MetaClasses.Serializable): + name: str + digest_jobs: Dict[str, str] + digest_dockers: Dict[str, str] + cache_success: List[str] + # there are might be issue with special characters in job names if used directly in yaml syntax - create base64 encoded list to avoid this + cache_success_base64: List[str] + cache_artifacts: Dict[str, Cache.CacheRecord] + sha: str + + @classmethod + def from_dict(cls, obj): + cache_artifacts = obj["cache_artifacts"] + cache_artifacts_deserialized = {} + for artifact_name, cache_artifact in cache_artifacts.items(): + cache_artifacts_deserialized[artifact_name] = Cache.CacheRecord.from_dict( + cache_artifact + ) + obj["cache_artifacts"] = cache_artifacts_deserialized + return RunConfig(**obj) + + @classmethod + def file_name_static(cls, name): + return ( + f"{Settings.TEMP_DIR}/workflow_config_{Utils.normalize_string(name)}.json" + ) diff --git a/ci/praktika/s3.py b/ci/praktika/s3.py new file mode 100644 index 00000000000..8cfb70a9076 --- /dev/null +++ b/ci/praktika/s3.py @@ -0,0 +1,295 @@ +import dataclasses +import json +import time +from pathlib import Path +from typing import Dict + +from praktika._environment import _Environment +from praktika.settings import Settings +from praktika.utils import Shell, Utils + + +class S3: + @dataclasses.dataclass + class Object: + AcceptRanges: str + Expiration: str + LastModified: str + ContentLength: int + ETag: str + ContentType: str + ServerSideEncryption: str + Metadata: Dict + + def has_tags(self, tags): + meta = self.Metadata + for k, v in tags.items(): + if k not in meta or meta[k] != v: + print(f"tag [{k}={v}] does not match meta [{meta}]") + return False + return True + + @classmethod + def clean_s3_directory(cls, s3_path): + assert len(s3_path.split("/")) > 2, "check to not delete too much" + cmd = f"aws s3 rm s3://{s3_path} --recursive" + cls.run_command_with_retries(cmd, retries=1) + return + + @classmethod + def copy_file_to_s3(cls, s3_path, local_path, text=False): + assert Path(local_path).exists(), f"Path [{local_path}] does not exist" + assert Path(s3_path), f"Invalid S3 Path [{s3_path}]" + assert Path( + local_path + ).is_file(), f"Path [{local_path}] is not file. Only files are supported" + file_name = Path(local_path).name + s3_full_path = s3_path + if not s3_full_path.endswith(file_name): + s3_full_path = f"{s3_path}/{Path(local_path).name}" + cmd = f"aws s3 cp {local_path} s3://{s3_full_path}" + if text: + cmd += " --content-type text/plain" + res = cls.run_command_with_retries(cmd) + if not res: + raise + bucket = s3_path.split("/")[0] + endpoint = Settings.S3_BUCKET_TO_HTTP_ENDPOINT[bucket] + assert endpoint + return f"https://{s3_full_path}".replace(bucket, endpoint) + + @classmethod + def put(cls, s3_path, local_path, text=False, metadata=None): + assert Path(local_path).exists(), f"Path [{local_path}] does not exist" + assert Path(s3_path), f"Invalid S3 Path [{s3_path}]" + assert Path( + local_path + ).is_file(), f"Path [{local_path}] is not file. Only files are supported" + file_name = Path(local_path).name + s3_full_path = s3_path + if not s3_full_path.endswith(file_name): + s3_full_path = f"{s3_path}/{Path(local_path).name}" + + s3_full_path = str(s3_full_path).removeprefix("s3://") + bucket, key = s3_full_path.split("/", maxsplit=1) + + command = ( + f"aws s3api put-object --bucket {bucket} --key {key} --body {local_path}" + ) + if metadata: + for k, v in metadata.items(): + command += f" --metadata {k}={v}" + + cmd = f"aws s3 cp {local_path} s3://{s3_full_path}" + if text: + cmd += " --content-type text/plain" + res = cls.run_command_with_retries(command) + assert res + + @classmethod + def run_command_with_retries(cls, command, retries=Settings.MAX_RETRIES_S3): + i = 0 + res = False + while not res and i < retries: + i += 1 + ret_code, stdout, stderr = Shell.get_res_stdout_stderr( + command, verbose=True + ) + if "aws sso login" in stderr: + print("ERROR: aws login expired") + break + elif "does not exist" in stderr: + print("ERROR: requested file does not exist") + break + if ret_code != 0: + print( + f"ERROR: aws s3 cp failed, stdout/stderr err: [{stderr}], out [{stdout}]" + ) + res = ret_code == 0 + return res + + @classmethod + def get_link(cls, s3_path, local_path): + s3_full_path = f"{s3_path}/{Path(local_path).name}" + bucket = s3_path.split("/")[0] + endpoint = Settings.S3_BUCKET_TO_HTTP_ENDPOINT[bucket] + return f"https://{s3_full_path}".replace(bucket, endpoint) + + @classmethod + def copy_file_from_s3(cls, s3_path, local_path): + assert Path(s3_path), f"Invalid S3 Path [{s3_path}]" + if Path(local_path).is_dir(): + local_path = Path(local_path) / Path(s3_path).name + else: + assert Path( + local_path + ).parent.is_dir(), f"Parent path for [{local_path}] does not exist" + cmd = f"aws s3 cp s3://{s3_path} {local_path}" + res = cls.run_command_with_retries(cmd) + return res + + @classmethod + def head_object(cls, s3_path): + s3_path = str(s3_path).removeprefix("s3://") + bucket, key = s3_path.split("/", maxsplit=1) + output = Shell.get_output( + f"aws s3api head-object --bucket {bucket} --key {key}", verbose=True + ) + if not output: + return None + else: + return cls.Object(**json.loads(output)) + + @classmethod + def delete(cls, s3_path): + assert Path(s3_path), f"Invalid S3 Path [{s3_path}]" + return Shell.check( + f"aws s3 rm s3://{s3_path}", + verbose=True, + ) + + # TODO: apparently should be placed into separate file to be used only inside praktika + # keeping this module clean from importing Settings, Environment and etc, making it easy for use externally + @classmethod + def copy_result_to_s3(cls, result, unlock=True): + result.dump() + env = _Environment.get() + s3_path = f"{Settings.HTML_S3_PATH}/{env.get_s3_prefix()}" + s3_path_full = f"{s3_path}/{Path(result.file_name()).name}" + url = S3.copy_file_to_s3(s3_path=s3_path, local_path=result.file_name()) + if env.PR_NUMBER: + print("Duplicate Result for latest commit alias in PR") + s3_path = f"{Settings.HTML_S3_PATH}/{env.get_s3_prefix(latest=True)}" + url = S3.copy_file_to_s3(s3_path=s3_path, local_path=result.file_name()) + if unlock: + if not cls.unlock(s3_path_full): + print(f"ERROR: File [{s3_path_full}] unlock failure") + assert False # TODO: investigate + return url + + @classmethod + def copy_result_from_s3(cls, local_path, lock=True): + env = _Environment.get() + file_name = Path(local_path).name + s3_path = f"{Settings.HTML_S3_PATH}/{env.get_s3_prefix()}/{file_name}" + if lock: + cls.lock(s3_path) + if not S3.copy_file_from_s3(s3_path=s3_path, local_path=local_path): + print(f"ERROR: failed to cp file [{s3_path}] from s3") + raise + + @classmethod + def lock(cls, s3_path, level=0): + assert level < 3, "Never" + env = _Environment.get() + s3_path_lock = s3_path + f".lock" + file_path_lock = f"{Settings.TEMP_DIR}/{Path(s3_path_lock).name}" + assert Shell.check( + f"echo '''{env.JOB_NAME}''' > {file_path_lock}", verbose=True + ), "Never" + + i = 20 + meta = S3.head_object(s3_path_lock) + while meta: + print(f"WARNING: Failed to acquire lock, meta [{meta}] - wait") + i -= 5 + if i < 0: + info = f"ERROR: lock acquire failure - unlock forcefully" + print(info) + env.add_info(info) + break + time.sleep(5) + + metadata = {"job": Utils.to_base64(env.JOB_NAME)} + S3.put( + s3_path=s3_path_lock, + local_path=file_path_lock, + metadata=metadata, + ) + time.sleep(1) + obj = S3.head_object(s3_path_lock) + if not obj or not obj.has_tags(tags=metadata): + print(f"WARNING: locked by another job [{obj}]") + env.add_info("S3 lock file failure") + cls.lock(s3_path, level=level + 1) + print("INFO: lock acquired") + + @classmethod + def unlock(cls, s3_path): + s3_path_lock = s3_path + ".lock" + env = _Environment.get() + obj = S3.head_object(s3_path_lock) + if not obj: + print("ERROR: lock file is removed") + assert False # investigate + elif not obj.has_tags({"job": Utils.to_base64(env.JOB_NAME)}): + print("ERROR: lock file was acquired by another job") + assert False # investigate + + if not S3.delete(s3_path_lock): + print(f"ERROR: File [{s3_path_lock}] delete failure") + print("INFO: lock released") + return True + + @classmethod + def get_result_link(cls, result): + env = _Environment.get() + s3_path = f"{Settings.HTML_S3_PATH}/{env.get_s3_prefix(latest=True if env.PR_NUMBER else False)}" + return S3.get_link(s3_path=s3_path, local_path=result.file_name()) + + @classmethod + def clean_latest_result(cls): + env = _Environment.get() + env.SHA = "latest" + assert env.PR_NUMBER + s3_path = f"{Settings.HTML_S3_PATH}/{env.get_s3_prefix()}" + S3.clean_s3_directory(s3_path=s3_path) + + @classmethod + def _upload_file_to_s3( + cls, local_file_path, upload_to_s3: bool, text: bool = False, s3_subprefix="" + ) -> str: + if upload_to_s3: + env = _Environment.get() + s3_path = f"{Settings.HTML_S3_PATH}/{env.get_s3_prefix()}" + if s3_subprefix: + s3_subprefix.removeprefix("/").removesuffix("/") + s3_path += f"/{s3_subprefix}" + html_link = S3.copy_file_to_s3( + s3_path=s3_path, local_path=local_file_path, text=text + ) + return html_link + return f"file://{Path(local_file_path).absolute()}" + + @classmethod + def upload_result_files_to_s3(cls, result): + if result.results: + for result_ in result.results: + cls.upload_result_files_to_s3(result_) + for file in result.files: + if not Path(file).is_file(): + print(f"ERROR: Invalid file [{file}] in [{result.name}] - skip upload") + result.info += f"\nWARNING: Result file [{file}] was not found" + file_link = cls._upload_file_to_s3(file, upload_to_s3=False) + else: + is_text = False + for text_file_suffix in Settings.TEXT_CONTENT_EXTENSIONS: + if file.endswith(text_file_suffix): + print( + f"File [{file}] matches Settings.TEXT_CONTENT_EXTENSIONS [{Settings.TEXT_CONTENT_EXTENSIONS}] - add text attribute for s3 object" + ) + is_text = True + break + file_link = cls._upload_file_to_s3( + file, + upload_to_s3=True, + text=is_text, + s3_subprefix=Utils.normalize_string(result.name), + ) + result.links.append(file_link) + if result.files: + print( + f"Result files [{result.files}] uploaded to s3 [{result.links[-len(result.files):]}] - clean files list" + ) + result.files = [] + result.dump() diff --git a/ci/praktika/secret.py b/ci/praktika/secret.py new file mode 100644 index 00000000000..9c033d76708 --- /dev/null +++ b/ci/praktika/secret.py @@ -0,0 +1,61 @@ +import dataclasses +import os + +from praktika.utils import Shell + + +class Secret: + class Type: + AWS_SSM_VAR = "aws parameter" + AWS_SSM_SECRET = "aws secret" + GH_SECRET = "gh secret" + + @dataclasses.dataclass + class Config: + name: str + type: str + + def is_gh(self): + return self.type == Secret.Type.GH_SECRET + + def get_value(self): + if self.type == Secret.Type.AWS_SSM_VAR: + return self.get_aws_ssm_var() + if self.type == Secret.Type.AWS_SSM_SECRET: + return self.get_aws_ssm_secret() + elif self.type == Secret.Type.GH_SECRET: + return self.get_gh_secret() + else: + assert False, f"Not supported secret type, secret [{self}]" + + def get_aws_ssm_var(self): + res = Shell.get_output( + f"aws ssm get-parameter --name {self.name} --with-decryption --output text --query Parameter.Value", + ) + if not res: + print(f"ERROR: Failed to get secret [{self.name}]") + raise RuntimeError() + return res + + def get_aws_ssm_secret(self): + name, secret_key_name = self.name, "" + if "." in self.name: + name, secret_key_name = self.name.split(".") + cmd = f"aws secretsmanager get-secret-value --secret-id {name} --query SecretString --output text" + if secret_key_name: + cmd += f" | jq -r '.[\"{secret_key_name}\"]'" + res = Shell.get_output(cmd, verbose=True) + if not res: + print(f"ERROR: Failed to get secret [{self.name}]") + raise RuntimeError() + return res + + def get_gh_secret(self): + res = os.getenv(f"{self.name}") + if not res: + print(f"ERROR: Failed to get secret [{self.name}]") + raise RuntimeError() + return res + + def __repr__(self): + return self.name diff --git a/ci/praktika/settings.py b/ci/praktika/settings.py new file mode 100644 index 00000000000..1a4068d9398 --- /dev/null +++ b/ci/praktika/settings.py @@ -0,0 +1,8 @@ +from praktika._settings import _Settings +from praktika.mangle import _get_user_settings + +Settings = _Settings() + +user_settings = _get_user_settings() +for setting, value in user_settings.items(): + Settings.__setattr__(setting, value) diff --git a/ci/praktika/utils.py b/ci/praktika/utils.py new file mode 100644 index 00000000000..1983ce274a3 --- /dev/null +++ b/ci/praktika/utils.py @@ -0,0 +1,597 @@ +import base64 +import dataclasses +import glob +import json +import multiprocessing +import os +import re +import signal +import subprocess +import sys +import time +from abc import ABC, abstractmethod +from contextlib import contextmanager +from datetime import datetime +from pathlib import Path +from threading import Thread +from types import SimpleNamespace +from typing import Any, Dict, Iterator, List, Optional, Type, TypeVar, Union + +from praktika._settings import _Settings + +T = TypeVar("T", bound="Serializable") + + +class MetaClasses: + class WithIter(type): + def __iter__(cls): + return (v for k, v in cls.__dict__.items() if not k.startswith("_")) + + @dataclasses.dataclass + class Serializable(ABC): + @classmethod + def to_dict(cls, obj): + if dataclasses.is_dataclass(obj): + return {k: cls.to_dict(v) for k, v in dataclasses.asdict(obj).items()} + elif isinstance(obj, SimpleNamespace): + return {k: cls.to_dict(v) for k, v in vars(obj).items()} + elif isinstance(obj, list): + return [cls.to_dict(i) for i in obj] + elif isinstance(obj, dict): + return {k: cls.to_dict(v) for k, v in obj.items()} + else: + return obj + + @classmethod + def from_dict(cls: Type[T], obj: Dict[str, Any]) -> T: + return cls(**obj) + + @classmethod + def from_fs(cls: Type[T], name) -> T: + with open(cls.file_name_static(name), "r", encoding="utf8") as f: + try: + return cls.from_dict(json.load(f)) + except json.decoder.JSONDecodeError as ex: + print(f"ERROR: failed to parse json, ex [{ex}]") + print(f"JSON content [{cls.file_name_static(name)}]") + Shell.check(f"cat {cls.file_name_static(name)}") + raise ex + + @classmethod + @abstractmethod + def file_name_static(cls, name): + pass + + def file_name(self): + return self.file_name_static(self.name) + + def dump(self): + with open(self.file_name(), "w", encoding="utf8") as f: + json.dump(self.to_dict(self), f, indent=4) + return self + + @classmethod + def exist(cls, name): + return Path(cls.file_name_static(name)).is_file() + + def to_json(self, pretty=False): + return json.dumps(dataclasses.asdict(self), indent=4 if pretty else None) + + +class ContextManager: + @staticmethod + @contextmanager + def cd(to: Optional[Union[Path, str]] = None) -> Iterator[None]: + """ + changes current working directory to @path or `git root` if @path is None + :param to: + :return: + """ + if not to: + try: + to = Shell.get_output_or_raise("git rev-parse --show-toplevel") + except: + pass + if not to: + if Path(_Settings.DOCKER_WD).is_dir(): + to = _Settings.DOCKER_WD + if not to: + assert False, "FIX IT" + assert to + old_pwd = os.getcwd() + os.chdir(to) + try: + yield + finally: + os.chdir(old_pwd) + + +class Shell: + @classmethod + def get_output_or_raise(cls, command, verbose=False): + return cls.get_output(command, verbose=verbose, strict=True).strip() + + @classmethod + def get_output(cls, command, strict=False, verbose=False): + if verbose: + print(f"Run command [{command}]") + res = subprocess.run( + command, + shell=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + ) + if res.stderr: + print(f"WARNING: stderr: {res.stderr.strip()}") + if strict and res.returncode != 0: + raise RuntimeError(f"command failed with {res.returncode}") + return res.stdout.strip() + + @classmethod + def get_res_stdout_stderr(cls, command, verbose=True): + if verbose: + print(f"Run command [{command}]") + res = subprocess.run( + command, + shell=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + ) + return res.returncode, res.stdout.strip(), res.stderr.strip() + + @classmethod + def check( + cls, + command, + log_file=None, + strict=False, + verbose=False, + dry_run=False, + stdin_str=None, + timeout=None, + retries=0, + **kwargs, + ): + return ( + cls.run( + command, + log_file, + strict, + verbose, + dry_run, + stdin_str, + retries=retries, + timeout=timeout, + **kwargs, + ) + == 0 + ) + + @classmethod + def run( + cls, + command, + log_file=None, + strict=False, + verbose=False, + dry_run=False, + stdin_str=None, + timeout=None, + retries=0, + **kwargs, + ): + def _check_timeout(timeout, process) -> None: + if not timeout: + return + time.sleep(timeout) + print( + f"WARNING: Timeout exceeded [{timeout}], sending SIGTERM to process group [{process.pid}]" + ) + try: + os.killpg(process.pid, signal.SIGTERM) + except ProcessLookupError: + print("Process already terminated.") + return + + time_wait = 0 + wait_interval = 5 + + # Wait for process to terminate + while process.poll() is None and time_wait < 100: + print("Waiting for process to exit...") + time.sleep(wait_interval) + time_wait += wait_interval + + # Force kill if still running + if process.poll() is None: + print(f"WARNING: Process still running after SIGTERM, sending SIGKILL") + try: + os.killpg(process.pid, signal.SIGKILL) + except ProcessLookupError: + print("Process already terminated.") + + # Dry-run + if dry_run: + print(f"Dry-run. Would run command [{command}]") + return 0 # Return success for dry-run + + if verbose: + print(f"Run command: [{command}]") + + log_file = log_file or "/dev/null" + proc = None + for retry in range(retries + 1): + try: + with open(log_file, "w") as log_fp: + proc = subprocess.Popen( + command, + shell=True, + stderr=subprocess.STDOUT, + stdout=subprocess.PIPE, + stdin=subprocess.PIPE if stdin_str else None, + universal_newlines=True, + start_new_session=True, # Start a new process group for signal handling + bufsize=1, # Line-buffered + errors="backslashreplace", + **kwargs, + ) + + # Start the timeout thread if specified + if timeout: + t = Thread(target=_check_timeout, args=(timeout, proc)) + t.daemon = True + t.start() + + # Write stdin if provided + if stdin_str: + proc.stdin.write(stdin_str) + proc.stdin.close() + + # Process output in real-time + if proc.stdout: + for line in proc.stdout: + sys.stdout.write(line) + log_fp.write(line) + + proc.wait() # Wait for the process to finish + + if proc.returncode == 0: + break # Exit retry loop if success + else: + if verbose: + print( + f"ERROR: command [{command}] failed, exit code: {proc.returncode}, retry: {retry}/{retries}" + ) + except Exception as e: + if verbose: + print( + f"ERROR: command failed, exception: {e}, retry: {retry}/{retries}" + ) + if proc: + proc.kill() + + # Handle strict mode (ensure process success or fail) + if strict: + assert ( + proc and proc.returncode == 0 + ), f"Command failed with return code {proc.returncode}" + + return proc.returncode if proc else 1 # Return 1 if process never started + + @classmethod + def run_async( + cls, + command, + stdin_str=None, + verbose=False, + suppress_output=False, + **kwargs, + ): + if verbose: + print(f"Run command in background [{command}]") + proc = subprocess.Popen( + command, + shell=True, + stderr=subprocess.STDOUT if not suppress_output else subprocess.DEVNULL, + stdout=subprocess.PIPE if not suppress_output else subprocess.DEVNULL, + stdin=subprocess.PIPE if stdin_str else None, + universal_newlines=True, + start_new_session=True, + bufsize=1, + errors="backslashreplace", + **kwargs, + ) + if proc.stdout: + for line in proc.stdout: + print(line, end="") + return proc + + +class Utils: + @staticmethod + def terminate_process_group(pid, force=False): + if not force: + os.killpg(os.getpgid(pid), signal.SIGTERM) + else: + os.killpg(os.getpgid(pid), signal.SIGKILL) + + @staticmethod + def set_env(key, val): + os.environ[key] = val + + @staticmethod + def print_formatted_error(error_message, stdout="", stderr=""): + stdout_lines = stdout.splitlines() if stdout else [] + stderr_lines = stderr.splitlines() if stderr else [] + print(f"ERROR: {error_message}") + if stdout_lines: + print(" Out:") + for line in stdout_lines: + print(f" | {line}") + if stderr_lines: + print(" Err:") + for line in stderr_lines: + print(f" | {line}") + + @staticmethod + def sleep(seconds): + time.sleep(seconds) + + @staticmethod + def cwd(): + return Path.cwd() + + @staticmethod + def cpu_count(): + return multiprocessing.cpu_count() + + @staticmethod + def raise_with_error(error_message, stdout="", stderr=""): + Utils.print_formatted_error(error_message, stdout, stderr) + raise + + @staticmethod + def timestamp(): + return datetime.utcnow().timestamp() + + @staticmethod + def timestamp_to_str(timestamp): + return datetime.utcfromtimestamp(timestamp).strftime("%Y-%m-%d %H:%M:%S") + + @staticmethod + def get_failed_tests_number(description: str) -> Optional[int]: + description = description.lower() + + pattern = r"fail:\s*(\d+)\s*(?=,|$)" + match = re.search(pattern, description) + if match: + return int(match.group(1)) + return None + + @staticmethod + def is_killed_with_oom(): + if Shell.check( + "sudo dmesg -T | grep -q -e 'Out of memory: Killed process' -e 'oom_reaper: reaped process' -e 'oom-kill:constraint=CONSTRAINT_NONE'" + ): + return True + return False + + @staticmethod + def clear_dmesg(): + Shell.check("sudo dmesg --clear", verbose=True) + + @staticmethod + def to_base64(value): + assert isinstance(value, str), f"TODO: not supported for {type(value)}" + string_bytes = value.encode("utf-8") + base64_bytes = base64.b64encode(string_bytes) + base64_string = base64_bytes.decode("utf-8") + return base64_string + + @staticmethod + def is_hex(s): + try: + int(s, 16) + return True + except ValueError: + return False + + @staticmethod + def normalize_string(string: str) -> str: + res = string.lower() + for r in ( + (" ", "_"), + ("(", ""), + (")", ""), + ("{", ""), + ("}", ""), + ("'", ""), + ("[", ""), + ("]", ""), + (",", ""), + ("/", "_"), + ("-", "_"), + (":", ""), + ('"', ""), + ): + res = res.replace(*r) + return res + + @staticmethod + def traverse_path(path, file_suffixes=None, sorted=False, not_exists_ok=False): + res = [] + + def is_valid_file(file): + if file_suffixes is None: + return True + return any(file.endswith(suffix) for suffix in file_suffixes) + + if os.path.isfile(path): + if is_valid_file(path): + res.append(path) + elif os.path.isdir(path): + for root, dirs, files in os.walk(path): + for file in files: + full_path = os.path.join(root, file) + if is_valid_file(full_path): + res.append(full_path) + elif "*" in str(path): + res.extend( + [ + f + for f in glob.glob(path, recursive=True) + if os.path.isfile(f) and is_valid_file(f) + ] + ) + else: + if not_exists_ok: + pass + else: + assert False, f"File does not exist or not valid [{path}]" + + if sorted: + res.sort(reverse=True) + + return res + + @classmethod + def traverse_paths( + cls, + include_paths, + exclude_paths, + file_suffixes=None, + sorted=False, + not_exists_ok=False, + ) -> List["str"]: + included_files_ = set() + for path in include_paths: + included_files_.update(cls.traverse_path(path, file_suffixes=file_suffixes)) + + excluded_files = set() + for path in exclude_paths: + res = cls.traverse_path(path, not_exists_ok=not_exists_ok) + if not res: + print( + f"WARNING: Utils.traverse_paths excluded 0 files by path [{path}] in exclude_paths" + ) + else: + excluded_files.update(res) + res = [f for f in included_files_ if f not in excluded_files] + if sorted: + res.sort(reverse=True) + return res + + @classmethod + def add_to_PATH(cls, path): + path_cur = os.getenv("PATH", "") + if path_cur: + path += ":" + path_cur + os.environ["PATH"] = path + + class Stopwatch: + def __init__(self): + self.start_time = datetime.utcnow().timestamp() + + @property + def duration(self) -> float: + return datetime.utcnow().timestamp() - self.start_time + + +class TeePopen: + def __init__( + self, + command: str, + log_file: Union[str, Path] = "", + env: Optional[dict] = None, + timeout: Optional[int] = None, + ): + self.command = command + self.log_file_name = log_file + self.log_file = None + self.env = env or os.environ.copy() + self.process = None # type: Optional[subprocess.Popen] + self.timeout = timeout + self.timeout_exceeded = False + self.terminated_by_sigterm = False + self.terminated_by_sigkill = False + + def _check_timeout(self) -> None: + if self.timeout is None: + return + time.sleep(self.timeout) + print( + f"WARNING: Timeout exceeded [{self.timeout}], send SIGTERM to [{self.process.pid}] and give a chance for graceful termination" + ) + self.send_signal(signal.SIGTERM) + time_wait = 0 + self.terminated_by_sigterm = True + self.timeout_exceeded = True + while self.process.poll() is None and time_wait < 100: + print("wait...") + wait = 5 + time.sleep(wait) + time_wait += wait + while self.process.poll() is None: + print(f"WARNING: Still running, send SIGKILL to [{self.process.pid}]") + self.send_signal(signal.SIGKILL) + self.terminated_by_sigkill = True + time.sleep(2) + + def __enter__(self) -> "TeePopen": + if self.log_file_name: + self.log_file = open(self.log_file_name, "w", encoding="utf-8") + self.process = subprocess.Popen( + self.command, + shell=True, + universal_newlines=True, + env=self.env, + start_new_session=True, # signall will be sent to all children + stderr=subprocess.STDOUT, + stdout=subprocess.PIPE, + bufsize=1, + errors="backslashreplace", + ) + time.sleep(1) + print(f"Subprocess started, pid [{self.process.pid}]") + if self.timeout is not None and self.timeout > 0: + t = Thread(target=self._check_timeout) + t.daemon = True # does not block the program from exit + t.start() + return self + + def __exit__(self, exc_type, exc_value, traceback): + self.wait() + if self.log_file: + self.log_file.close() + + def wait(self) -> int: + if self.process.stdout is not None: + for line in self.process.stdout: + sys.stdout.write(line) + if self.log_file: + self.log_file.write(line) + + return self.process.wait() + + def poll(self): + return self.process.poll() + + def send_signal(self, signal_num): + os.killpg(self.process.pid, signal_num) + + +if __name__ == "__main__": + + @dataclasses.dataclass + class Test(MetaClasses.Serializable): + name: str + + @staticmethod + def file_name_static(name): + return f"/tmp/{Utils.normalize_string(name)}.json" + + Test(name="dsada").dump() + t = Test.from_fs("dsada") + print(t) diff --git a/ci/praktika/validator.py b/ci/praktika/validator.py new file mode 100644 index 00000000000..29edc0a27ed --- /dev/null +++ b/ci/praktika/validator.py @@ -0,0 +1,208 @@ +import glob +import sys +from itertools import chain +from pathlib import Path + +from praktika import Workflow +from praktika._settings import GHRunners +from praktika.mangle import _get_workflows +from praktika.settings import Settings +from praktika.utils import ContextManager + + +class Validator: + @classmethod + def validate(cls): + print("---Start validating Pipeline and settings---") + workflows = _get_workflows() + for workflow in workflows: + print(f"Validating workflow [{workflow.name}]") + + cls.validate_file_paths_in_run_command(workflow) + cls.validate_file_paths_in_digest_configs(workflow) + cls.validate_requirements_txt_files(workflow) + cls.validate_dockers(workflow) + + if workflow.artifacts: + for artifact in workflow.artifacts: + if artifact.is_s3_artifact(): + assert ( + Settings.S3_ARTIFACT_PATH + ), "Provide S3_ARTIFACT_PATH setting in any .py file in ./ci/settings/* to be able to use s3 for artifacts" + + for job in workflow.jobs: + if job.requires and workflow.artifacts: + for require in job.requires: + if ( + require in workflow.artifacts + and workflow.artifacts[require].is_s3_artifact() + ): + assert not any( + [r in GHRunners for r in job.runs_on] + ), f"GH runners [{job.name}:{job.runs_on}] must not be used with S3 as artifact storage" + + if job.allow_merge_on_failure: + assert ( + workflow.enable_merge_ready_status + ), f"Job property allow_merge_on_failure must be used only with enabled workflow.enable_merge_ready_status, workflow [{workflow.name}], job [{job.name}]" + + if workflow.enable_cache: + assert ( + Settings.CI_CONFIG_RUNS_ON + ), f"Runner label to run workflow config job must be provided via CACHE_CONFIG_RUNS_ON setting if enable_cache=True, workflow [{workflow.name}]" + + assert ( + Settings.CACHE_S3_PATH + ), f"CACHE_S3_PATH Setting must be defined if enable_cache=True, workflow [{workflow.name}]" + + if workflow.dockers: + cls.evaluate_check( + Settings.DOCKER_BUILD_RUNS_ON, + f"DOCKER_BUILD_RUNS_ON settings must be defined if workflow has dockers", + workflow_name=workflow.name, + ) + + if workflow.enable_report: + assert ( + Settings.HTML_S3_PATH + ), f"HTML_S3_PATH Setting must be defined if enable_html=True, workflow [{workflow.name}]" + assert ( + Settings.S3_BUCKET_TO_HTTP_ENDPOINT + ), f"S3_BUCKET_TO_HTTP_ENDPOINT Setting must be defined if enable_html=True, workflow [{workflow.name}]" + assert ( + Settings.HTML_S3_PATH.split("/")[0] + in Settings.S3_BUCKET_TO_HTTP_ENDPOINT + ), f"S3_BUCKET_TO_HTTP_ENDPOINT Setting must include bucket name [{Settings.HTML_S3_PATH}] from HTML_S3_PATH, workflow [{workflow.name}]" + + if workflow.enable_cache: + for artifact in workflow.artifacts or []: + assert ( + artifact.is_s3_artifact() + ), f"All artifacts must be of S3 type if enable_cache|enable_html=True, artifact [{artifact.name}], type [{artifact.type}], workflow [{workflow.name}]" + + if workflow.dockers: + assert ( + Settings.DOCKERHUB_USERNAME + ), f"Settings.DOCKERHUB_USERNAME must be provided if workflow has dockers, workflow [{workflow.name}]" + assert ( + Settings.DOCKERHUB_SECRET + ), f"Settings.DOCKERHUB_SECRET must be provided if workflow has dockers, workflow [{workflow.name}]" + assert workflow.get_secret( + Settings.DOCKERHUB_SECRET + ), f"Secret [{Settings.DOCKERHUB_SECRET}] must have configuration in workflow.secrets, workflow [{workflow.name}]" + + if ( + workflow.enable_cache + or workflow.enable_report + or workflow.enable_merge_ready_status + ): + for job in workflow.jobs: + assert not any( + job in ("ubuntu-latest",) for job in job.runs_on + ), f"GitHub Runners must not be used for workflow with enabled: workflow.enable_cache, workflow.enable_html or workflow.enable_merge_ready_status as s3 access is required, workflow [{workflow.name}], job [{job.name}]" + + if workflow.enable_cidb: + assert ( + Settings.SECRET_CI_DB_URL + ), f"Settings.CI_DB_URL_SECRET must be provided if workflow.enable_cidb=True, workflow [{workflow.name}]" + assert ( + Settings.SECRET_CI_DB_PASSWORD + ), f"Settings.CI_DB_PASSWORD_SECRET must be provided if workflow.enable_cidb=True, workflow [{workflow.name}]" + assert ( + Settings.CI_DB_DB_NAME + ), f"Settings.CI_DB_DB_NAME must be provided if workflow.enable_cidb=True, workflow [{workflow.name}]" + assert ( + Settings.CI_DB_TABLE_NAME + ), f"Settings.CI_DB_TABLE_NAME must be provided if workflow.enable_cidb=True, workflow [{workflow.name}]" + + @classmethod + def validate_file_paths_in_run_command(cls, workflow: Workflow.Config) -> None: + if not Settings.VALIDATE_FILE_PATHS: + return + with ContextManager.cd(): + for job in workflow.jobs: + run_command = job.command + command_parts = run_command.split(" ") + for part in command_parts: + if ">" in part: + return + if "/" in part: + assert ( + Path(part).is_file() or Path(part).is_dir() + ), f"Apparently run command [{run_command}] for job [{job}] has invalid path [{part}]. Setting to disable check: VALIDATE_FILE_PATHS" + + @classmethod + def validate_file_paths_in_digest_configs(cls, workflow: Workflow.Config) -> None: + if not Settings.VALIDATE_FILE_PATHS: + return + with ContextManager.cd(): + for job in workflow.jobs: + if not job.digest_config: + continue + for include_path in chain( + job.digest_config.include_paths, job.digest_config.exclude_paths + ): + if "*" in include_path: + assert glob.glob( + include_path, recursive=True + ), f"Apparently file glob [{include_path}] in job [{job.name}] digest_config [{job.digest_config}] invalid, workflow [{workflow.name}]. Setting to disable check: VALIDATE_FILE_PATHS" + else: + assert ( + Path(include_path).is_file() or Path(include_path).is_dir() + ), f"Apparently file path [{include_path}] in job [{job.name}] digest_config [{job.digest_config}] invalid, workflow [{workflow.name}]. Setting to disable check: VALIDATE_FILE_PATHS" + + @classmethod + def validate_requirements_txt_files(cls, workflow: Workflow.Config) -> None: + with ContextManager.cd(): + for job in workflow.jobs: + if job.job_requirements: + if job.job_requirements.python_requirements_txt: + path = Path(job.job_requirements.python_requirements_txt) + message = f"File with py requirement [{path}] does not exist" + if job.name in ( + Settings.DOCKER_BUILD_JOB_NAME, + Settings.CI_CONFIG_JOB_NAME, + Settings.FINISH_WORKFLOW_JOB_NAME, + ): + message += '\n If all requirements already installed on your runners - add setting INSTALL_PYTHON_REQS_FOR_NATIVE_JOBS""' + message += "\n If requirements needs to be installed - add requirements file (Settings.INSTALL_PYTHON_REQS_FOR_NATIVE_JOBS):" + message += "\n echo jwt==1.3.1 > ./ci/requirements.txt" + message += ( + "\n echo requests==2.32.3 >> ./ci/requirements.txt" + ) + message += "\n echo https://clickhouse-builds.s3.amazonaws.com/packages/praktika-0.1-py3-none-any.whl >> ./ci/requirements.txt" + cls.evaluate_check( + path.is_file(), message, job.name, workflow.name + ) + + @classmethod + def validate_dockers(cls, workflow: Workflow.Config): + names = [] + for docker in workflow.dockers: + cls.evaluate_check( + docker.name not in names, + f"Non uniq docker name [{docker.name}]", + workflow_name=workflow.name, + ) + names.append(docker.name) + for docker in workflow.dockers: + for docker_dep in docker.depends_on: + cls.evaluate_check( + docker_dep in names, + f"Docker [{docker.name}] has invalid dependency [{docker_dep}]", + workflow_name=workflow.name, + ) + + @classmethod + def evaluate_check(cls, check_ok, message, workflow_name, job_name=""): + message = message.split("\n") + messages = [message] if not isinstance(message, list) else message + if check_ok: + return + else: + print( + f"ERROR: Config validation failed: workflow [{workflow_name}], job [{job_name}]:" + ) + for message in messages: + print(" || " + message) + sys.exit(1) diff --git a/ci/praktika/version.py b/ci/praktika/version.py new file mode 100644 index 00000000000..b71dad9b794 --- /dev/null +++ b/ci/praktika/version.py @@ -0,0 +1 @@ +VERSION = 1 diff --git a/ci/praktika/workflow.py b/ci/praktika/workflow.py new file mode 100644 index 00000000000..41e8056f9ef --- /dev/null +++ b/ci/praktika/workflow.py @@ -0,0 +1,68 @@ +from dataclasses import dataclass, field +from typing import List, Optional + +from praktika import Artifact, Job +from praktika.docker import Docker +from praktika.secret import Secret +from praktika.utils import Utils + + +class Workflow: + class Event: + PULL_REQUEST = "pull_request" + PUSH = "push" + + @dataclass + class Config: + """ + branches - List of branch names or patterns, for push trigger only + base_branches - List of base branches (target branch), for pull_request trigger only + """ + + name: str + event: str + jobs: List[Job.Config] + branches: List[str] = field(default_factory=list) + base_branches: List[str] = field(default_factory=list) + artifacts: List[Artifact.Config] = field(default_factory=list) + dockers: List[Docker.Config] = field(default_factory=list) + secrets: List[Secret.Config] = field(default_factory=list) + enable_cache: bool = False + enable_report: bool = False + enable_merge_ready_status: bool = False + enable_cidb: bool = False + + def is_event_pull_request(self): + return self.event == Workflow.Event.PULL_REQUEST + + def is_event_push(self): + return self.event == Workflow.Event.PUSH + + def get_job(self, name): + job = self.find_job(name) + if not job: + Utils.raise_with_error( + f"Failed to find job [{name}], workflow [{self.name}]" + ) + return job + + def find_job(self, name, lazy=False): + name = str(name) + for job in self.jobs: + if lazy: + if name.lower() in job.name.lower(): + return job + else: + if job.name == name: + return job + return None + + def get_secret(self, name) -> Optional[Secret.Config]: + name = str(name) + names = [] + for secret in self.secrets: + if secret.name == name: + return secret + names.append(secret.name) + print(f"ERROR: Failed to find secret [{name}], workflow secrets [{names}]") + raise diff --git a/ci/praktika/yaml_generator.py b/ci/praktika/yaml_generator.py new file mode 100644 index 00000000000..9c61b5e2f79 --- /dev/null +++ b/ci/praktika/yaml_generator.py @@ -0,0 +1,349 @@ +import dataclasses +from typing import List + +from praktika import Artifact, Job, Workflow +from praktika.mangle import _get_workflows +from praktika.parser import WorkflowConfigParser +from praktika.runtime import RunConfig +from praktika.settings import Settings +from praktika.utils import ContextManager, Shell, Utils + + +class YamlGenerator: + class Templates: + TEMPLATE_PULL_REQUEST_0 = """\ +# generated by praktika + +name: {NAME} + +on: + {EVENT}: + branches: [{BRANCHES}] + +# Cancel the previous wf run in PRs. +concurrency: + group: ${{{{{{{{ github.workflow }}}}}}}}-${{{{{{{{ github.ref }}}}}}}} + cancel-in-progress: true + +env: + # Force the stdout and stderr streams to be unbuffered + PYTHONUNBUFFERED: 1 + GH_TOKEN: ${{{{{{{{ github.token }}}}}}}} + +# Allow updating GH commit statuses and PR comments to post an actual job reports link +permissions: write-all + +jobs: +{JOBS}\ +""" + + TEMPLATE_CALLABLE_WORKFLOW = """\ +# generated by praktika + +name: {NAME} +on: + workflow_call: + inputs: + config: + type: string + required: false + default: '' + secrets: +{SECRETS} + +env: + PYTHONUNBUFFERED: 1 + +jobs: +{JOBS}\ +""" + + TEMPLATE_SECRET_CONFIG = """\ + {SECRET_NAME}: + required: true +""" + + TEMPLATE_MATRIX = """ + strategy: + fail-fast: false + matrix: + params: {PARAMS_LIST}\ +""" + + TEMPLATE_JOB_0 = """ + {JOB_NAME_NORMALIZED}: + runs-on: [{RUNS_ON}] + needs: [{NEEDS}]{IF_EXPRESSION} + name: "{JOB_NAME_GH}" + outputs: + data: ${{{{ steps.run.outputs.DATA }}}} + steps: + - name: Checkout code + uses: actions/checkout@v4 +{JOB_ADDONS} + - name: Prepare env script + run: | + export PYTHONPATH=.:$PYTHONPATH + cat > {ENV_SETUP_SCRIPT} << 'ENV_SETUP_SCRIPT_EOF' +{SETUP_ENVS} + cat > {WORKFLOW_CONFIG_FILE} << 'EOF' + ${{{{ needs.{WORKFLOW_CONFIG_JOB_NAME}.outputs.data }}}} + EOF + cat > {WORKFLOW_STATUS_FILE} << 'EOF' + ${{{{ toJson(needs) }}}} + EOF + ENV_SETUP_SCRIPT_EOF + + rm -rf {INPUT_DIR} {OUTPUT_DIR} {TEMP_DIR} + mkdir -p {TEMP_DIR} {INPUT_DIR} {OUTPUT_DIR} +{DOWNLOADS_GITHUB} + - name: Run + id: run + run: | + set -o pipefail + {PYTHON} -m praktika run --job '''{JOB_NAME}''' --workflow "{WORKFLOW_NAME}" --ci |& tee {RUN_LOG} +{UPLOADS_GITHUB}\ +""" + + TEMPLATE_SETUP_ENV_SECRETS = """\ + export {SECRET_NAME}=$(cat<<'EOF' + ${{{{ secrets.{SECRET_NAME} }}}} + EOF + )\ +""" + + TEMPLATE_PY_INSTALL = """ + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: {PYTHON_VERSION} +""" + + TEMPLATE_PY_WITH_REQUIREMENTS = """ + - name: Install dependencies + run: | + sudo apt-get update && sudo apt install -y python3-pip + # TODO: --break-system-packages? otherwise ubuntu's apt/apt-get complains + {PYTHON} -m pip install --upgrade pip --break-system-packages + {PIP} install -r {REQUIREMENT_PATH} --break-system-packages +""" + + TEMPLATE_GH_UPLOAD = """ + - name: Upload artifact {NAME} + uses: actions/upload-artifact@v4 + with: + name: {NAME} + path: {PATH} +""" + + TEMPLATE_GH_DOWNLOAD = """ + - name: Download artifact {NAME} + uses: actions/download-artifact@v4 + with: + name: {NAME} + path: {PATH} +""" + + TEMPLATE_IF_EXPRESSION = """ + if: ${{{{ !failure() && !cancelled() && !contains(fromJson(needs.{WORKFLOW_CONFIG_JOB_NAME}.outputs.data).cache_success_base64, '{JOB_NAME_BASE64}') }}}}\ +""" + + TEMPLATE_IF_EXPRESSION_SKIPPED_OR_SUCCESS = """ + if: ${{ !failure() && !cancelled() }}\ +""" + + TEMPLATE_IF_EXPRESSION_NOT_CANCELLED = """ + if: ${{ !cancelled() }}\ +""" + + def __init__(self): + self.py_workflows = [] # type: List[Workflow.Config] + + @classmethod + def _get_workflow_file_name(cls, workflow_name): + return f"{Settings.WORKFLOW_PATH_PREFIX}/{Utils.normalize_string(workflow_name)}.yaml" + + def generate(self, workflow_file="", workflow_config=None): + print("---Start generating yaml pipelines---") + if workflow_config: + self.py_workflows = [workflow_config] + else: + self.py_workflows = _get_workflows(file=workflow_file) + assert self.py_workflows + for workflow_config in self.py_workflows: + print(f"Generate workflow [{workflow_config.name}]") + parser = WorkflowConfigParser(workflow_config).parse() + if ( + workflow_config.is_event_pull_request() + or workflow_config.is_event_push() + ): + yaml_workflow_str = PullRequestPushYamlGen(parser).generate() + else: + assert ( + False + ), f"Workflow event not yet supported [{workflow_config.event}]" + + with ContextManager.cd(): + with open(self._get_workflow_file_name(workflow_config.name), "w") as f: + f.write(yaml_workflow_str) + + with ContextManager.cd(): + Shell.check("git add ./.github/workflows/*.yaml") + + +class PullRequestPushYamlGen: + def __init__(self, parser: WorkflowConfigParser): + self.workflow_config = parser.workflow_yaml_config + self.parser = parser + + def generate(self): + job_items = [] + for i, job in enumerate(self.workflow_config.jobs): + job_name_normalized = Utils.normalize_string(job.name) + needs = ", ".join(map(Utils.normalize_string, job.needs)) + job_name = job.name + job_addons = [] + for addon in job.addons: + if addon.install_python: + job_addons.append( + YamlGenerator.Templates.TEMPLATE_PY_INSTALL.format( + PYTHON_VERSION=Settings.PYTHON_VERSION + ) + ) + if addon.requirements_txt_path: + job_addons.append( + YamlGenerator.Templates.TEMPLATE_PY_WITH_REQUIREMENTS.format( + PYTHON=Settings.PYTHON_INTERPRETER, + PIP=Settings.PYTHON_PACKET_MANAGER, + PYTHON_VERSION=Settings.PYTHON_VERSION, + REQUIREMENT_PATH=addon.requirements_txt_path, + ) + ) + uploads_github = [] + for artifact in job.artifacts_gh_provides: + uploads_github.append( + YamlGenerator.Templates.TEMPLATE_GH_UPLOAD.format( + NAME=artifact.name, PATH=artifact.path + ) + ) + downloads_github = [] + for artifact in job.artifacts_gh_requires: + downloads_github.append( + YamlGenerator.Templates.TEMPLATE_GH_DOWNLOAD.format( + NAME=artifact.name, PATH=Settings.INPUT_DIR + ) + ) + + config_job_name_normalized = Utils.normalize_string( + Settings.CI_CONFIG_JOB_NAME + ) + + if_expression = "" + if ( + self.workflow_config.enable_cache + and job_name_normalized != config_job_name_normalized + ): + if_expression = YamlGenerator.Templates.TEMPLATE_IF_EXPRESSION.format( + WORKFLOW_CONFIG_JOB_NAME=config_job_name_normalized, + JOB_NAME_BASE64=Utils.to_base64(job_name), + ) + if job.run_unless_cancelled: + if_expression = ( + YamlGenerator.Templates.TEMPLATE_IF_EXPRESSION_NOT_CANCELLED + ) + + secrets_envs = [] + for secret in self.workflow_config.secret_names_gh: + secrets_envs.append( + YamlGenerator.Templates.TEMPLATE_SETUP_ENV_SECRETS.format( + SECRET_NAME=secret + ) + ) + + job_item = YamlGenerator.Templates.TEMPLATE_JOB_0.format( + JOB_NAME_NORMALIZED=job_name_normalized, + WORKFLOW_CONFIG_JOB_NAME=config_job_name_normalized, + IF_EXPRESSION=if_expression, + RUNS_ON=", ".join(job.runs_on), + NEEDS=needs, + JOB_NAME_GH=job_name.replace('"', '\\"'), + JOB_NAME=job_name.replace( + "'", "'\\''" + ), # ' must be escaped so that yaml commands are properly parsed + WORKFLOW_NAME=self.workflow_config.name, + ENV_SETUP_SCRIPT=Settings.ENV_SETUP_SCRIPT, + SETUP_ENVS="\n".join(secrets_envs), + WORKFLOW_CONFIG_FILE=RunConfig.file_name_static( + self.workflow_config.name + ), + JOB_ADDONS="".join(job_addons), + DOWNLOADS_GITHUB="\n".join(downloads_github), + UPLOADS_GITHUB="\n".join(uploads_github), + RUN_LOG=Settings.RUN_LOG, + PYTHON=Settings.PYTHON_INTERPRETER, + WORKFLOW_STATUS_FILE=Settings.WORKFLOW_STATUS_FILE, + TEMP_DIR=Settings.TEMP_DIR, + INPUT_DIR=Settings.INPUT_DIR, + OUTPUT_DIR=Settings.OUTPUT_DIR, + ) + job_items.append(job_item) + + base_template = YamlGenerator.Templates.TEMPLATE_PULL_REQUEST_0 + template_1 = base_template.strip().format( + NAME=self.workflow_config.name, + BRANCHES=", ".join( + [f"'{branch}'" for branch in self.workflow_config.branches] + ), + EVENT=self.workflow_config.event, + JOBS="{}" * len(job_items), + ) + res = template_1.format(*job_items) + + return res + + +@dataclasses.dataclass +class AuxConfig: + # defines aux step to install dependencies + addon: Job.Requirements + # defines aux step(s) to upload GH artifacts + uploads_gh: List[Artifact.Config] + # defines aux step(s) to download GH artifacts + downloads_gh: List[Artifact.Config] + + def get_aux_workflow_name(self): + suffix = "" + if self.addon.python_requirements_txt: + suffix += "_py" + for _ in self.uploads_gh: + suffix += "_uplgh" + for _ in self.downloads_gh: + suffix += "_dnlgh" + return f"{Settings.WORKFLOW_PATH_PREFIX}/aux_job{suffix}.yaml" + + def get_aux_workflow_input(self): + res = "" + if self.addon.python_requirements_txt: + res += f" requirements_txt: {self.addon.python_requirements_txt}" + return res + + +if __name__ == "__main__": + WFS = [ + Workflow.Config( + name="PR", + event=Workflow.Event.PULL_REQUEST, + jobs=[ + Job.Config( + name="Hello World", + runs_on=["foo"], + command="bar", + job_requirements=Job.Requirements( + python_requirements_txt="./requirement.txt" + ), + ) + ], + enable_cache=True, + ) + ] + YamlGenerator().generate(workflow_config=WFS) diff --git a/ci_v2/settings/definitions.py b/ci/settings/definitions.py similarity index 69% rename from ci_v2/settings/definitions.py rename to ci/settings/definitions.py index 87669cdcf25..4e6a7f213f0 100644 --- a/ci_v2/settings/definitions.py +++ b/ci/settings/definitions.py @@ -7,6 +7,7 @@ S3_BUCKET_HTTP_ENDPOINT = "clickhouse-builds.s3.amazonaws.com" class RunnerLabels: CI_SERVICES = "ci_services" CI_SERVICES_EBS = "ci_services_ebs" + BUILDER = "builder" BASE_BRANCH = "master" @@ -29,142 +30,122 @@ SECRETS = [ DOCKERS = [ # Docker.Config( # name="clickhouse/binary-builder", - # path="./docker/packager/binary-builder", - # arm64=True, - # amd64=True, + # path="./ci_v2/docker/packager/binary-builder", + # platforms=Docker.Platforms.arm_amd, # depends_on=[], # ), # Docker.Config( # name="clickhouse/cctools", - # path="./docker/packager/cctools", - # arm64=True, - # amd64=True, + # path="./ci_v2/docker/packager/cctools", + # platforms=Docker.Platforms.arm_amd, # depends_on=[], # ), # Docker.Config( # name="clickhouse/test-old-centos", - # path="./docker/test/compatibility/centos", - # arm64=True, - # amd64=True, + # path="./ci_v2/docker/test/compatibility/centos", + # platforms=Docker.Platforms.arm_amd, # depends_on=[], # ), # Docker.Config( # name="clickhouse/test-old-ubuntu", - # path="./docker/test/compatibility/ubuntu", - # arm64=True, - # amd64=True, + # path="./ci_v2/docker/test/compatibility/ubuntu", + # platforms=Docker.Platforms.arm_amd, # depends_on=[], # ), # Docker.Config( # name="clickhouse/test-util", - # path="./docker/test/util", - # arm64=True, - # amd64=True, + # path="./ci_v2/docker/test/util", + # platforms=Docker.Platforms.arm_amd, # depends_on=[], # ), # Docker.Config( # name="clickhouse/integration-test", - # path="./docker/test/integration/base", - # arm64=True, - # amd64=True, + # path="./ci_v2/docker/test/integration/base", + # platforms=Docker.Platforms.arm_amd, # depends_on=["clickhouse/test-base"], # ), # Docker.Config( # name="clickhouse/fuzzer", - # path="./docker/test/fuzzer", - # arm64=True, - # amd64=True, + # path="./ci_v2/docker/test/fuzzer", + # platforms=Docker.Platforms.arm_amd, # depends_on=["clickhouse/test-base"], # ), # Docker.Config( # name="clickhouse/performance-comparison", - # path="./docker/test/performance-comparison", - # arm64=True, - # amd64=True, + # path="./ci_v2/docker/test/performance-comparison", + # platforms=Docker.Platforms.arm_amd, # depends_on=[], # ), - # Docker.Config( - # name="clickhouse/fasttest", - # path="./docker/test/fasttest", - # arm64=True, - # amd64=True, - # depends_on=["clickhouse/test-util"], - # ), + Docker.Config( + name="clickhouse/fasttest", + path="./ci_v2/docker/fasttest", + platforms=Docker.Platforms.arm_amd, + depends_on=[], + ), # Docker.Config( # name="clickhouse/test-base", - # path="./docker/test/base", - # arm64=True, - # amd64=True, + # path="./ci_v2/docker/test/base", + # platforms=Docker.Platforms.arm_amd, # depends_on=["clickhouse/test-util"], # ), # Docker.Config( # name="clickhouse/clickbench", - # path="./docker/test/clickbench", - # arm64=True, - # amd64=True, + # path="./ci_v2/docker/test/clickbench", + # platforms=Docker.Platforms.arm_amd, # depends_on=["clickhouse/test-base"], # ), # Docker.Config( # name="clickhouse/keeper-jepsen-test", - # path="./docker/test/keeper-jepsen", - # arm64=True, - # amd64=True, + # path="./ci_v2/docker/test/keeper-jepsen", + # platforms=Docker.Platforms.arm_amd, # depends_on=["clickhouse/test-base"], # ), # Docker.Config( # name="clickhouse/server-jepsen-test", - # path="./docker/test/server-jepsen", - # arm64=True, - # amd64=True, + # path="./ci_v2/docker/test/server-jepsen", + # platforms=Docker.Platforms.arm_amd, # depends_on=["clickhouse/test-base"], # ), # Docker.Config( # name="clickhouse/sqllogic-test", - # path="./docker/test/sqllogic", - # arm64=True, - # amd64=True, + # path="./ci_v2/docker/test/sqllogic", + # platforms=Docker.Platforms.arm_amd, # depends_on=["clickhouse/test-base"], # ), # Docker.Config( # name="clickhouse/sqltest", - # path="./docker/test/sqltest", - # arm64=True, - # amd64=True, + # path="./ci_v2/docker/test/sqltest", + # platforms=Docker.Platforms.arm_amd, # depends_on=["clickhouse/test-base"], # ), # Docker.Config( # name="clickhouse/stateless-test", - # path="./docker/test/stateless", - # arm64=True, - # amd64=True, + # path="./ci_v2/docker/test/stateless", + # platforms=Docker.Platforms.arm_amd, # depends_on=["clickhouse/test-base"], # ), # Docker.Config( # name="clickhouse/stateful-test", - # path="./docker/test/stateful", - # arm64=True, - # amd64=True, + # path="./ci_v2/docker/test/stateful", + # platforms=Docker.Platforms.arm_amd, # depends_on=["clickhouse/stateless-test"], # ), # Docker.Config( # name="clickhouse/stress-test", - # path="./docker/test/stress", - # arm64=True, - # amd64=True, + # path="./ci_v2/docker/test/stress", + # platforms=Docker.Platforms.arm_amd, # depends_on=["clickhouse/stateful-test"], # ), # Docker.Config( # name="clickhouse/unit-test", - # path="./docker/test/unit", - # arm64=True, - # amd64=True, + # path="./ci_v2/docker/test/unit", + # platforms=Docker.Platforms.arm_amd, # depends_on=["clickhouse/test-base"], # ), # Docker.Config( # name="clickhouse/integration-tests-runner", - # path="./docker/test/integration/runner", - # arm64=True, - # amd64=True, + # path="./ci_v2/docker/test/integration/runner", + # platforms=Docker.Platforms.arm_amd, # depends_on=["clickhouse/test-base"], # ), Docker.Config( @@ -175,9 +156,8 @@ DOCKERS = [ ), # Docker.Config( # name="clickhouse/docs-builder", - # path="./docker/docs/builder", - # arm64=True, - # amd64=True, + # path="./ci_v2/docker/docs/builder", + # platforms=Docker.Platforms.arm_amd, # depends_on=["clickhouse/test-base"], # ), ] @@ -249,3 +229,4 @@ DOCKERS = [ class JobNames: STYLE_CHECK = "Style Check" + FAST_TEST = "Fast test" diff --git a/ci_v2/settings/settings.py b/ci/settings/settings.py similarity index 100% rename from ci_v2/settings/settings.py rename to ci/settings/settings.py diff --git a/ci_v2/workflows/pull_request.py b/ci/workflows/pull_request.py similarity index 70% rename from ci_v2/workflows/pull_request.py rename to ci/workflows/pull_request.py index 226455c77f2..0e96329788b 100644 --- a/ci_v2/workflows/pull_request.py +++ b/ci/workflows/pull_request.py @@ -16,12 +16,20 @@ style_check_job = Job.Config( run_in_docker="clickhouse/style-test", ) +fast_test_job = Job.Config( + name=JobNames.FAST_TEST, + runs_on=[RunnerLabels.BUILDER], + command="python3 ./ci_v2/jobs/fast_test.py", + run_in_docker="clickhouse/fasttest", +) + workflow = Workflow.Config( name="PR", event=Workflow.Event.PULL_REQUEST, base_branches=[BASE_BRANCH], jobs=[ style_check_job, + fast_test_job, ], dockers=DOCKERS, secrets=SECRETS, @@ -36,9 +44,7 @@ WORKFLOWS = [ if __name__ == "__main__": - # example: local job test inside praktika environment + # local job test inside praktika environment from praktika.runner import Runner - Runner.generate_dummy_environment(workflow, style_check_job) - - Runner().run(workflow, style_check_job) + Runner().run(workflow, fast_test_job, docker="fasttest", dummy_env=True) diff --git a/ci_v2/docker/style-test/requirements.txt b/ci_v2/docker/style-test/requirements.txt deleted file mode 100644 index 987b014d9ba..00000000000 --- a/ci_v2/docker/style-test/requirements.txt +++ /dev/null @@ -1,4 +0,0 @@ -requests==2.32.3 -yamllint==1.26.3 -codespell==2.2.1 -https://clickhouse-builds.s3.amazonaws.com/packages/praktika-0.1-py3-none-any.whl diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index b102b2919d9..fa0f95245f2 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -178,35 +178,13 @@ add_contrib (sqlite-cmake sqlite-amalgamation) add_contrib (s2geometry-cmake s2geometry) add_contrib (c-ares-cmake c-ares) -if (OS_LINUX AND ARCH_AMD64 AND ENABLE_SSE42) - option (ENABLE_QPL "Enable Intel® Query Processing Library (QPL)" ${ENABLE_LIBRARIES}) -elseif(ENABLE_QPL) - message (${RECONFIGURE_MESSAGE_LEVEL} "QPL library is only supported on x86_64 with SSE 4.2 or higher") -endif() -if (ENABLE_QPL) - add_contrib (idxd-config-cmake idxd-config) - add_contrib (qpl-cmake qpl) # requires: idxd-config -else() - message(STATUS "Not using QPL") -endif () - if (OS_LINUX AND ARCH_AMD64 AND NOT NO_SSE3_OR_HIGHER) option (ENABLE_QATLIB "Enable Intel® QuickAssist Technology Library (QATlib)" ${ENABLE_LIBRARIES}) elseif(ENABLE_QATLIB) message (${RECONFIGURE_MESSAGE_LEVEL} "QATLib is only supported on x86_64") endif() if (ENABLE_QATLIB) - option (ENABLE_QAT_USDM_DRIVER "A User Space DMA-able Memory (USDM) component which allocates/frees DMA-able memory" OFF) - option (ENABLE_QAT_OUT_OF_TREE_BUILD "Using out-of-tree driver, user needs to customize ICP_ROOT variable" OFF) - set(ICP_ROOT "" CACHE STRING "ICP_ROOT variable to define the path of out-of-tree driver package") - if (ENABLE_QAT_OUT_OF_TREE_BUILD) - if (ICP_ROOT STREQUAL "") - message(FATAL_ERROR "Please define the path of out-of-tree driver package with -DICP_ROOT=xxx or disable out-of-tree build with -DENABLE_QAT_OUT_OF_TREE_BUILD=OFF; \ - If you want out-of-tree build but have no package available, please download and build ICP package from: https://www.intel.com/content/www/us/en/download/765501.html") - endif () - else() - add_contrib (qatlib-cmake qatlib) # requires: isa-l - endif () + add_contrib (qatlib-cmake qatlib) # requires: isa-l add_contrib (QAT-ZSTD-Plugin-cmake QAT-ZSTD-Plugin) else() message(STATUS "Not using QATLib") diff --git a/contrib/QAT-ZSTD-Plugin-cmake/CMakeLists.txt b/contrib/QAT-ZSTD-Plugin-cmake/CMakeLists.txt index fc18092f574..5d1cfa2af14 100644 --- a/contrib/QAT-ZSTD-Plugin-cmake/CMakeLists.txt +++ b/contrib/QAT-ZSTD-Plugin-cmake/CMakeLists.txt @@ -1,85 +1,53 @@ # Intel® QuickAssist Technology ZSTD Plugin (QAT ZSTD Plugin) is a plugin to Zstandard*(ZSTD*) for accelerating compression by QAT. -# ENABLE_QAT_OUT_OF_TREE_BUILD = 1 means kernel don't have native support, user will build and install driver from external package: https://www.intel.com/content/www/us/en/download/765501.html -# meanwhile, user need to set ICP_ROOT environment variable which point to the root directory of QAT driver source tree. -# ENABLE_QAT_OUT_OF_TREE_BUILD = 0 means kernel has built-in qat driver, QAT-ZSTD-PLUGIN just has dependency on qatlib. -if (ENABLE_QAT_OUT_OF_TREE_BUILD) - message(STATUS "Intel QATZSTD out-of-tree build, ICP_ROOT:${ICP_ROOT}") +message(STATUS "Intel QATZSTD in-tree build") +set(QATZSTD_SRC_DIR "${ClickHouse_SOURCE_DIR}/contrib/QAT-ZSTD-Plugin/src") +set(QATZSTD_SRC "${QATZSTD_SRC_DIR}/qatseqprod.c") +set(ZSTD_LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/zstd/lib") - set(QATZSTD_SRC_DIR "${ClickHouse_SOURCE_DIR}/contrib/QAT-ZSTD-Plugin/src") - set(QATZSTD_SRC "${QATZSTD_SRC_DIR}/qatseqprod.c") - set(ZSTD_LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/zstd/lib") - set(QAT_INCLUDE_DIR "${ICP_ROOT}/quickassist/include") - set(QAT_DC_INCLUDE_DIR "${ICP_ROOT}/quickassist/include/dc") - set(QAT_AL_INCLUDE_DIR "${ICP_ROOT}/quickassist/lookaside/access_layer/include") - set(QAT_USDM_INCLUDE_DIR "${ICP_ROOT}/quickassist/utilities/libusdm_drv") - set(USDM_LIBRARY "${ICP_ROOT}/build/libusdm_drv_s.so") - set(QAT_S_LIBRARY "${ICP_ROOT}/build/libqat_s.so") - if (ENABLE_QAT_USDM_DRIVER) - add_definitions(-DENABLE_USDM_DRV) - endif() - add_library(_qatzstd_plugin ${QATZSTD_SRC}) - target_link_libraries (_qatzstd_plugin PUBLIC ${USDM_LIBRARY} ${QAT_S_LIBRARY}) - target_include_directories(_qatzstd_plugin - SYSTEM PUBLIC "${QATZSTD_SRC_DIR}" - PRIVATE ${QAT_INCLUDE_DIR} - ${QAT_DC_INCLUDE_DIR} - ${QAT_AL_INCLUDE_DIR} - ${QAT_USDM_INCLUDE_DIR} - ${ZSTD_LIBRARY_DIR}) - target_compile_definitions(_qatzstd_plugin PRIVATE -DDEBUGLEVEL=0) - add_library (ch_contrib::qatzstd_plugin ALIAS _qatzstd_plugin) -else () # In-tree build - message(STATUS "Intel QATZSTD in-tree build") - set(QATZSTD_SRC_DIR "${ClickHouse_SOURCE_DIR}/contrib/QAT-ZSTD-Plugin/src") - set(QATZSTD_SRC "${QATZSTD_SRC_DIR}/qatseqprod.c") - set(ZSTD_LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/zstd/lib") +# please download&build ICP package from: https://www.intel.com/content/www/us/en/download/765501.html +set(ICP_ROOT "${ClickHouse_SOURCE_DIR}/contrib/qatlib") +set(QAT_INCLUDE_DIR "${ICP_ROOT}/quickassist/include") +set(QAT_DC_INCLUDE_DIR "${ICP_ROOT}/quickassist/include/dc") +set(QAT_AL_INCLUDE_DIR "${ICP_ROOT}/quickassist/lookaside/access_layer/include") +set(QAT_USDM_INCLUDE_DIR "${ICP_ROOT}/quickassist/utilities/libusdm_drv") +set(USDM_LIBRARY "${ICP_ROOT}/build/libusdm_drv_s.so") +set(QAT_S_LIBRARY "${ICP_ROOT}/build/libqat_s.so") +set(LIBQAT_ROOT_DIR "${ClickHouse_SOURCE_DIR}/contrib/qatlib") +set(LIBQAT_HEADER_DIR "${CMAKE_CURRENT_BINARY_DIR}/include") - # please download&build ICP package from: https://www.intel.com/content/www/us/en/download/765501.html - set(ICP_ROOT "${ClickHouse_SOURCE_DIR}/contrib/qatlib") - set(QAT_INCLUDE_DIR "${ICP_ROOT}/quickassist/include") - set(QAT_DC_INCLUDE_DIR "${ICP_ROOT}/quickassist/include/dc") - set(QAT_AL_INCLUDE_DIR "${ICP_ROOT}/quickassist/lookaside/access_layer/include") - set(QAT_USDM_INCLUDE_DIR "${ICP_ROOT}/quickassist/utilities/libusdm_drv") - set(USDM_LIBRARY "${ICP_ROOT}/build/libusdm_drv_s.so") - set(QAT_S_LIBRARY "${ICP_ROOT}/build/libqat_s.so") - set(LIBQAT_ROOT_DIR "${ClickHouse_SOURCE_DIR}/contrib/qatlib") - set(LIBQAT_HEADER_DIR "${CMAKE_CURRENT_BINARY_DIR}/include") +file(MAKE_DIRECTORY + "${LIBQAT_HEADER_DIR}/qat" +) +file(COPY "${LIBQAT_ROOT_DIR}/quickassist/include/cpa.h" + DESTINATION "${LIBQAT_HEADER_DIR}/qat/" +) +file(COPY "${LIBQAT_ROOT_DIR}/quickassist/include/dc/cpa_dc.h" + DESTINATION "${LIBQAT_HEADER_DIR}/qat/" +) +file(COPY "${LIBQAT_ROOT_DIR}/quickassist/lookaside/access_layer/include/icp_sal_poll.h" + DESTINATION "${LIBQAT_HEADER_DIR}/qat/" +) +file(COPY "${LIBQAT_ROOT_DIR}/quickassist/lookaside/access_layer/include/icp_sal_user.h" + DESTINATION "${LIBQAT_HEADER_DIR}/qat/" +) +file(COPY "${LIBQAT_ROOT_DIR}/quickassist/utilities/libusdm_drv/qae_mem.h" + DESTINATION "${LIBQAT_HEADER_DIR}/qat/" +) - file(MAKE_DIRECTORY - "${LIBQAT_HEADER_DIR}/qat" - ) - file(COPY "${LIBQAT_ROOT_DIR}/quickassist/include/cpa.h" - DESTINATION "${LIBQAT_HEADER_DIR}/qat/" - ) - file(COPY "${LIBQAT_ROOT_DIR}/quickassist/include/dc/cpa_dc.h" - DESTINATION "${LIBQAT_HEADER_DIR}/qat/" - ) - file(COPY "${LIBQAT_ROOT_DIR}/quickassist/lookaside/access_layer/include/icp_sal_poll.h" - DESTINATION "${LIBQAT_HEADER_DIR}/qat/" - ) - file(COPY "${LIBQAT_ROOT_DIR}/quickassist/lookaside/access_layer/include/icp_sal_user.h" - DESTINATION "${LIBQAT_HEADER_DIR}/qat/" - ) - file(COPY "${LIBQAT_ROOT_DIR}/quickassist/utilities/libusdm_drv/qae_mem.h" - DESTINATION "${LIBQAT_HEADER_DIR}/qat/" - ) - - if (ENABLE_QAT_USDM_DRIVER) - add_definitions(-DENABLE_USDM_DRV) - endif() - - add_library(_qatzstd_plugin ${QATZSTD_SRC}) - target_link_libraries (_qatzstd_plugin PUBLIC ch_contrib::qatlib ch_contrib::usdm) - target_include_directories(_qatzstd_plugin PRIVATE - ${QAT_INCLUDE_DIR} - ${QAT_DC_INCLUDE_DIR} - ${QAT_AL_INCLUDE_DIR} - ${QAT_USDM_INCLUDE_DIR} - ${ZSTD_LIBRARY_DIR} - ${LIBQAT_HEADER_DIR}) - target_compile_definitions(_qatzstd_plugin PRIVATE -DDEBUGLEVEL=0 PUBLIC -DINTREE) - target_include_directories(_qatzstd_plugin SYSTEM PUBLIC $ $) - add_library (ch_contrib::qatzstd_plugin ALIAS _qatzstd_plugin) -endif () +if (ENABLE_QAT_USDM_DRIVER) + add_definitions(-DENABLE_USDM_DRV) +endif() +add_library(_qatzstd_plugin ${QATZSTD_SRC}) +target_link_libraries (_qatzstd_plugin PUBLIC ch_contrib::qatlib ch_contrib::usdm) +target_include_directories(_qatzstd_plugin PRIVATE + ${QAT_INCLUDE_DIR} + ${QAT_DC_INCLUDE_DIR} + ${QAT_AL_INCLUDE_DIR} + ${QAT_USDM_INCLUDE_DIR} + ${ZSTD_LIBRARY_DIR} + ${LIBQAT_HEADER_DIR}) +target_compile_definitions(_qatzstd_plugin PRIVATE -DDEBUGLEVEL=0 PUBLIC -DINTREE) +target_include_directories(_qatzstd_plugin SYSTEM PUBLIC $ $) +add_library (ch_contrib::qatzstd_plugin ALIAS _qatzstd_plugin) diff --git a/contrib/idxd-config b/contrib/idxd-config deleted file mode 160000 index a836ce0e420..00000000000 --- a/contrib/idxd-config +++ /dev/null @@ -1 +0,0 @@ -Subproject commit a836ce0e42052a69bffbbc14239ab4097f3b77f1 diff --git a/contrib/idxd-config-cmake/CMakeLists.txt b/contrib/idxd-config-cmake/CMakeLists.txt deleted file mode 100644 index 030252ec8e6..00000000000 --- a/contrib/idxd-config-cmake/CMakeLists.txt +++ /dev/null @@ -1,23 +0,0 @@ -## accel_config is the utility library required by QPL-Deflate codec for controlling and configuring Intel® In-Memory Analytics Accelerator (Intel® IAA). -set (LIBACCEL_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/idxd-config") -set (UUID_DIR "${ClickHouse_SOURCE_DIR}/contrib/qpl-cmake") -set (LIBACCEL_HEADER_DIR "${ClickHouse_SOURCE_DIR}/contrib/idxd-config-cmake/include") -set (SRCS - "${LIBACCEL_SOURCE_DIR}/accfg/lib/libaccfg.c" - "${LIBACCEL_SOURCE_DIR}/util/log.c" - "${LIBACCEL_SOURCE_DIR}/util/sysfs.c" -) - -add_library(_accel-config ${SRCS}) - -target_compile_options(_accel-config PRIVATE "-D_GNU_SOURCE") - -target_include_directories(_accel-config BEFORE - PRIVATE ${UUID_DIR} - PRIVATE ${LIBACCEL_HEADER_DIR} - PRIVATE ${LIBACCEL_SOURCE_DIR}) - -target_include_directories(_accel-config SYSTEM BEFORE - PUBLIC ${LIBACCEL_SOURCE_DIR}/accfg) - -add_library(ch_contrib::accel-config ALIAS _accel-config) diff --git a/contrib/idxd-config-cmake/include/config.h b/contrib/idxd-config-cmake/include/config.h deleted file mode 100644 index f03b0eac0b0..00000000000 --- a/contrib/idxd-config-cmake/include/config.h +++ /dev/null @@ -1,159 +0,0 @@ -/* config.h. Generated from config.h.in by configure. */ -/* config.h.in. Generated from configure.ac by autoheader. */ - -/* Define if building universal (internal helper macro) */ -/* #undef AC_APPLE_UNIVERSAL_BUILD */ - -/* Debug messages. */ -/* #undef ENABLE_DEBUG */ - -/* Documentation / man pages. */ -/* #define ENABLE_DOCS */ - -/* System logging. */ -#define ENABLE_LOGGING 1 - -/* accfg test support */ -/* #undef ENABLE_TEST */ - -/* Define to 1 if big-endian-arch */ -/* #undef HAVE_BIG_ENDIAN */ - -/* Define to 1 if you have the header file. */ -#define HAVE_DLFCN_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_INTTYPES_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_LINUX_VERSION_H 1 - -/* Define to 1 if little-endian-arch */ -#define HAVE_LITTLE_ENDIAN 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_MEMORY_H 1 - -/* Define to 1 if you have the `secure_getenv' function. */ -#define HAVE_SECURE_GETENV 1 - -/* Define to 1 if you have statement expressions. */ -#define HAVE_STATEMENT_EXPR 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_STDINT_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_STDLIB_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_STRINGS_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_STRING_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_SYS_STAT_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_SYS_TYPES_H 1 - -/* Define to 1 if typeof works with your compiler. */ -#define HAVE_TYPEOF 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_UNISTD_H 1 - -/* Define to 1 if using libuuid */ -#define HAVE_UUID 1 - -/* Define to 1 if you have the `__secure_getenv' function. */ -/* #undef HAVE___SECURE_GETENV */ - -/* Define to the sub-directory where libtool stores uninstalled libraries. */ -#define LT_OBJDIR ".libs/" - -/* Name of package */ -#define PACKAGE "accel-config" - -/* Define to the address where bug reports for this package should be sent. */ -#define PACKAGE_BUGREPORT "linux-dsa@lists.01.org" - -/* Define to the full name of this package. */ -#define PACKAGE_NAME "accel-config" - -/* Define to the full name and version of this package. */ -#define PACKAGE_STRING "accel-config 3.5.2.gitf6605c41" - -/* Define to the one symbol short name of this package. */ -#define PACKAGE_TARNAME "accel-config" - -/* Define to the home page for this package. */ -#define PACKAGE_URL "https://github.com/xxx/accel-config" - -/* Define to the version of this package. */ -#define PACKAGE_VERSION "3.5.2.gitf6605c41" - -/* Define to 1 if you have the ANSI C header files. */ -#define STDC_HEADERS 1 - -/* Enable extensions on AIX 3, Interix. */ -#ifndef _ALL_SOURCE -# define _ALL_SOURCE 1 -#endif -/* Enable GNU extensions on systems that have them. */ -#ifndef _GNU_SOURCE -# define _GNU_SOURCE 1 -#endif -/* Enable threading extensions on Solaris. */ -#ifndef _POSIX_PTHREAD_SEMANTICS -# define _POSIX_PTHREAD_SEMANTICS 1 -#endif -/* Enable extensions on HP NonStop. */ -#ifndef _TANDEM_SOURCE -# define _TANDEM_SOURCE 1 -#endif -/* Enable general extensions on Solaris. */ -#ifndef __EXTENSIONS__ -# define __EXTENSIONS__ 1 -#endif - - -/* Version number of package */ -#define VERSION "3.5.2.gitf6605c41" - -/* Define WORDS_BIGENDIAN to 1 if your processor stores words with the most - significant byte first (like Motorola and SPARC, unlike Intel). */ -#if defined AC_APPLE_UNIVERSAL_BUILD -# if defined __BIG_ENDIAN__ -# define WORDS_BIGENDIAN 1 -# endif -#else -# ifndef WORDS_BIGENDIAN -/* # undef WORDS_BIGENDIAN */ -# endif -#endif - -/* Enable large inode numbers on Mac OS X 10.5. */ -#ifndef _DARWIN_USE_64_BIT_INODE -# define _DARWIN_USE_64_BIT_INODE 1 -#endif - -/* Number of bits in a file offset, on hosts where this is settable. */ -/* #undef _FILE_OFFSET_BITS */ - -/* Define for large files, on AIX-style hosts. */ -/* #undef _LARGE_FILES */ - -/* Define to 1 if on MINIX. */ -/* #undef _MINIX */ - -/* Define to 2 if the system does not provide POSIX.1 features except with - this defined. */ -/* #undef _POSIX_1_SOURCE */ - -/* Define to 1 if you need to in order for `stat' and other things to work. */ -/* #undef _POSIX_SOURCE */ - -/* Define to __typeof__ if your compiler spells it that way. */ -/* #undef typeof */ diff --git a/contrib/qpl b/contrib/qpl deleted file mode 160000 index c2ced94c53c..00000000000 --- a/contrib/qpl +++ /dev/null @@ -1 +0,0 @@ -Subproject commit c2ced94c53c1ee22191201a59878e9280bc9b9b8 diff --git a/contrib/qpl-cmake/CMakeLists.txt b/contrib/qpl-cmake/CMakeLists.txt deleted file mode 100644 index 89332ae0f7a..00000000000 --- a/contrib/qpl-cmake/CMakeLists.txt +++ /dev/null @@ -1,738 +0,0 @@ -## The Intel® QPL provides high performance implementations of data processing functions for existing hardware accelerator, and/or software path in case if hardware accelerator is not available. -set (UUID_DIR "${ClickHouse_SOURCE_DIR}/contrib/qpl-cmake") -set (QPL_PROJECT_DIR "${ClickHouse_SOURCE_DIR}/contrib/qpl") -set (QPL_SRC_DIR "${ClickHouse_SOURCE_DIR}/contrib/qpl/sources") -set (QPL_BINARY_DIR "${ClickHouse_BINARY_DIR}/build/contrib/qpl") -set (EFFICIENT_WAIT OFF) -set (LOG_HW_INIT OFF) -set (SANITIZE_MEMORY OFF) -set (SANITIZE_THREADS OFF) -set (LIB_FUZZING_ENGINE OFF) -set (DYNAMIC_LOADING_LIBACCEL_CONFIG OFF) - -function(GetLibraryVersion _content _outputVar) - string(REGEX MATCHALL "QPL VERSION (.+) LANGUAGES" VERSION_REGEX "${_content}") - SET(${_outputVar} ${CMAKE_MATCH_1} PARENT_SCOPE) -endfunction() - -set (QPL_VERSION 1.6.0) - -message(STATUS "Intel QPL version: ${QPL_VERSION}") - -# There are 5 source subdirectories under $QPL_SRC_DIR: c_api, core-iaa, core-sw, middle-layer and isal. -# Generate 8 library targets: qpl_c_api, core_iaa, qplcore_px, qplcore_avx512, qplcore_sw_dispatcher, middle_layer_lib, isal and isal_asm, -# which are then combined into static or shared qpl. -# Output ch_contrib::qpl by linking with 8 library targets. - -# Note, QPL has integrated a customized version of ISA-L to meet specific needs. -# This version has been significantly modified and there are no plans to maintain compatibility with the upstream version -# or upgrade the current copy. - -## cmake/CompileOptions.cmake and automatic wrappers generation - -# ========================================================================== -# Copyright (C) 2022 Intel Corporation -# -# SPDX-License-Identifier: MIT -# ========================================================================== - -set(QPL_LINUX_TOOLCHAIN_CPP_EMBEDDED_FLAGS "-fno-exceptions;-fno-rtti") - -function(modify_standard_language_flag) - # Declaring function parameters - set(OPTIONS "") - set(ONE_VALUE_ARGS - LANGUAGE_NAME - FLAG_NAME - NEW_FLAG_VALUE) - set(MULTI_VALUE_ARGS "") - - # Parsing function parameters - cmake_parse_arguments(MODIFY - "${OPTIONS}" - "${ONE_VALUE_ARGS}" - "${MULTI_VALUE_ARGS}" - ${ARGN}) - - # Variables - set(FLAG_REGULAR_EXPRESSION "${MODIFY_FLAG_NAME}.*[ ]*") - set(NEW_VALUE "${MODIFY_FLAG_NAME}${MODIFY_NEW_FLAG_VALUE}") - - # Replacing specified flag with new value - string(REGEX REPLACE - ${FLAG_REGULAR_EXPRESSION} ${NEW_VALUE} - NEW_COMPILE_FLAGS - "${CMAKE_${MODIFY_LANGUAGE_NAME}_FLAGS}") - - # Returning the value - set(CMAKE_${MODIFY_LANGUAGE_NAME}_FLAGS ${NEW_COMPILE_FLAGS} PARENT_SCOPE) -endfunction() - -function(get_function_name_with_default_bit_width in_function_name bit_width out_function_name) - - if(in_function_name MATCHES ".*_i") - - string(REPLACE "_i" "" in_function_name ${in_function_name}) - - set(${out_function_name} "${in_function_name}_${bit_width}_i" PARENT_SCOPE) - - else() - - set(${out_function_name} "${in_function_name}_${bit_width}" PARENT_SCOPE) - - endif() - -endfunction() - -macro(get_list_of_supported_optimizations PLATFORMS_LIST) - list(APPEND PLATFORMS_LIST "") - list(APPEND PLATFORMS_LIST "px") - list(APPEND PLATFORMS_LIST "avx512") -endmacro(get_list_of_supported_optimizations) - -function(generate_unpack_kernel_arrays current_directory PLATFORMS_LIST) - list(APPEND UNPACK_POSTFIX_LIST "") - list(APPEND UNPACK_PRLE_POSTFIX_LIST "") - list(APPEND PACK_POSTFIX_LIST "") - list(APPEND PACK_INDEX_POSTFIX_LIST "") - list(APPEND SCAN_POSTFIX_LIST "") - list(APPEND DEFAULT_BIT_WIDTH_FUNCTIONS_LIST "") - list(APPEND DEFAULT_BIT_WIDTH_LIST "") - - #create list of functions that use only 8u 16u 32u postfixes - list(APPEND DEFAULT_BIT_WIDTH_FUNCTIONS_LIST "unpack_prle") - list(APPEND DEFAULT_BIT_WIDTH_FUNCTIONS_LIST "extract") - list(APPEND DEFAULT_BIT_WIDTH_FUNCTIONS_LIST "extract_i") - list(APPEND DEFAULT_BIT_WIDTH_FUNCTIONS_LIST "select") - list(APPEND DEFAULT_BIT_WIDTH_FUNCTIONS_LIST "select_i") - list(APPEND DEFAULT_BIT_WIDTH_FUNCTIONS_LIST "expand") - - #create default bit width list - list(APPEND DEFAULT_BIT_WIDTH_LIST "8u") - list(APPEND DEFAULT_BIT_WIDTH_LIST "16u") - list(APPEND DEFAULT_BIT_WIDTH_LIST "32u") - - #create scan kernel postfixes - list(APPEND SCAN_COMPARATOR_LIST "") - - list(APPEND SCAN_COMPARATOR_LIST "eq") - list(APPEND SCAN_COMPARATOR_LIST "ne") - list(APPEND SCAN_COMPARATOR_LIST "lt") - list(APPEND SCAN_COMPARATOR_LIST "le") - list(APPEND SCAN_COMPARATOR_LIST "gt") - list(APPEND SCAN_COMPARATOR_LIST "ge") - list(APPEND SCAN_COMPARATOR_LIST "range") - list(APPEND SCAN_COMPARATOR_LIST "not_range") - - foreach(SCAN_COMPARATOR IN LISTS SCAN_COMPARATOR_LIST) - list(APPEND SCAN_POSTFIX_LIST "_${SCAN_COMPARATOR}_8u") - list(APPEND SCAN_POSTFIX_LIST "_${SCAN_COMPARATOR}_16u8u") - list(APPEND SCAN_POSTFIX_LIST "_${SCAN_COMPARATOR}_32u8u") - endforeach() - - # create unpack kernel postfixes - foreach(input_width RANGE 1 32 1) - if(input_width LESS 8 OR input_width EQUAL 8) - list(APPEND UNPACK_POSTFIX_LIST "_${input_width}u8u") - - elseif(input_width LESS 16 OR input_width EQUAL 16) - list(APPEND UNPACK_POSTFIX_LIST "_${input_width}u16u") - - else() - list(APPEND UNPACK_POSTFIX_LIST "_${input_width}u32u") - endif() - endforeach() - - # create pack kernel postfixes - foreach(output_width RANGE 1 8 1) - list(APPEND PACK_POSTFIX_LIST "_8u${output_width}u") - endforeach() - - foreach(output_width RANGE 9 16 1) - list(APPEND PACK_POSTFIX_LIST "_16u${output_width}u") - endforeach() - - foreach(output_width RANGE 17 32 1) - list(APPEND PACK_POSTFIX_LIST "_32u${output_width}u") - endforeach() - - list(APPEND PACK_POSTFIX_LIST "_8u16u") - list(APPEND PACK_POSTFIX_LIST "_8u32u") - list(APPEND PACK_POSTFIX_LIST "_16u32u") - - # create pack index kernel postfixes - list(APPEND PACK_INDEX_POSTFIX_LIST "_nu") - list(APPEND PACK_INDEX_POSTFIX_LIST "_8u") - list(APPEND PACK_INDEX_POSTFIX_LIST "_8u16u") - list(APPEND PACK_INDEX_POSTFIX_LIST "_8u32u") - - # write to file - file(MAKE_DIRECTORY ${current_directory}/generated) - - foreach(PLATFORM_VALUE IN LISTS PLATFORMS_LIST) - set(directory "${current_directory}/generated") - set(PLATFORM_PREFIX "${PLATFORM_VALUE}_") - - # - # Write unpack table - # - file(WRITE ${directory}/${PLATFORM_PREFIX}unpack.cpp "#include \"qplc_api.h\"\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}unpack.cpp "#include \"dispatcher/dispatcher.hpp\"\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}unpack.cpp "namespace qpl::core_sw::dispatcher\n{\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}unpack.cpp "unpack_table_t ${PLATFORM_PREFIX}unpack_table = {\n") - - #write LE kernels - foreach(UNPACK_POSTFIX IN LISTS UNPACK_POSTFIX_LIST) - file(APPEND ${directory}/${PLATFORM_PREFIX}unpack.cpp "\t${PLATFORM_PREFIX}qplc_unpack${UNPACK_POSTFIX},\n") - endforeach() - - #write BE kernels - - #get last element of the list - set(LAST_ELEMENT "") - list(GET UNPACK_POSTFIX_LIST -1 LAST_ELEMENT) - - foreach(UNPACK_POSTFIX IN LISTS UNPACK_POSTFIX_LIST) - - if(UNPACK_POSTFIX STREQUAL LAST_ELEMENT) - file(APPEND ${directory}/${PLATFORM_PREFIX}unpack.cpp "\t${PLATFORM_PREFIX}qplc_unpack_be${UNPACK_POSTFIX}};\n") - else() - file(APPEND ${directory}/${PLATFORM_PREFIX}unpack.cpp "\t${PLATFORM_PREFIX}qplc_unpack_be${UNPACK_POSTFIX},\n") - endif() - endforeach() - - file(APPEND ${directory}/${PLATFORM_PREFIX}unpack.cpp "}\n") - - # - # Write pack table - # - file(WRITE ${directory}/${PLATFORM_PREFIX}pack.cpp "#include \"qplc_api.h\"\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}pack.cpp "#include \"dispatcher/dispatcher.hpp\"\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}pack.cpp "namespace qpl::core_sw::dispatcher\n{\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}pack.cpp "pack_table_t ${PLATFORM_PREFIX}pack_table = {\n") - - #write LE kernels - foreach(PACK_POSTFIX IN LISTS PACK_POSTFIX_LIST) - file(APPEND ${directory}/${PLATFORM_PREFIX}pack.cpp "\t${PLATFORM_PREFIX}qplc_pack${PACK_POSTFIX},\n") - endforeach() - - #write BE kernels - - #get last element of the list - set(LAST_ELEMENT "") - list(GET PACK_POSTFIX_LIST -1 LAST_ELEMENT) - - foreach(PACK_POSTFIX IN LISTS PACK_POSTFIX_LIST) - - if(PACK_POSTFIX STREQUAL LAST_ELEMENT) - file(APPEND ${directory}/${PLATFORM_PREFIX}pack.cpp "\t${PLATFORM_PREFIX}qplc_pack_be${PACK_POSTFIX}};\n") - else() - file(APPEND ${directory}/${PLATFORM_PREFIX}pack.cpp "\t${PLATFORM_PREFIX}qplc_pack_be${PACK_POSTFIX},\n") - endif() - endforeach() - - file(APPEND ${directory}/${PLATFORM_PREFIX}pack.cpp "}\n") - - # - # Write scan table - # - file(WRITE ${directory}/${PLATFORM_PREFIX}scan.cpp "#include \"qplc_api.h\"\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}scan.cpp "#include \"dispatcher/dispatcher.hpp\"\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}scan.cpp "namespace qpl::core_sw::dispatcher\n{\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}scan.cpp "scan_table_t ${PLATFORM_PREFIX}scan_table = {\n") - - #get last element of the list - set(LAST_ELEMENT "") - list(GET SCAN_POSTFIX_LIST -1 LAST_ELEMENT) - - foreach(SCAN_POSTFIX IN LISTS SCAN_POSTFIX_LIST) - - if(SCAN_POSTFIX STREQUAL LAST_ELEMENT) - file(APPEND ${directory}/${PLATFORM_PREFIX}scan.cpp "\t${PLATFORM_PREFIX}qplc_scan${SCAN_POSTFIX}};\n") - else() - file(APPEND ${directory}/${PLATFORM_PREFIX}scan.cpp "\t${PLATFORM_PREFIX}qplc_scan${SCAN_POSTFIX},\n") - endif() - endforeach() - - file(APPEND ${directory}/${PLATFORM_PREFIX}scan.cpp "}\n") - - # - # Write scan_i table - # - file(WRITE ${directory}/${PLATFORM_PREFIX}scan_i.cpp "#include \"qplc_api.h\"\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}scan_i.cpp "#include \"dispatcher/dispatcher.hpp\"\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}scan_i.cpp "namespace qpl::core_sw::dispatcher\n{\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}scan_i.cpp "scan_i_table_t ${PLATFORM_PREFIX}scan_i_table = {\n") - - #get last element of the list - set(LAST_ELEMENT "") - list(GET SCAN_POSTFIX_LIST -1 LAST_ELEMENT) - - foreach(SCAN_POSTFIX IN LISTS SCAN_POSTFIX_LIST) - - if(SCAN_POSTFIX STREQUAL LAST_ELEMENT) - file(APPEND ${directory}/${PLATFORM_PREFIX}scan_i.cpp "\t${PLATFORM_PREFIX}qplc_scan${SCAN_POSTFIX}_i};\n") - else() - file(APPEND ${directory}/${PLATFORM_PREFIX}scan_i.cpp "\t${PLATFORM_PREFIX}qplc_scan${SCAN_POSTFIX}_i,\n") - endif() - endforeach() - - file(APPEND ${directory}/${PLATFORM_PREFIX}scan_i.cpp "}\n") - - # - # Write pack_index table - # - file(WRITE ${directory}/${PLATFORM_PREFIX}pack_index.cpp "#include \"qplc_api.h\"\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}pack_index.cpp "#include \"dispatcher/dispatcher.hpp\"\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}pack_index.cpp "namespace qpl::core_sw::dispatcher\n{\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}pack_index.cpp "pack_index_table_t ${PLATFORM_PREFIX}pack_index_table = {\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}pack_index.cpp "\t${PLATFORM_PREFIX}qplc_pack_bits_nu,\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}pack_index.cpp "\t${PLATFORM_PREFIX}qplc_pack_index_8u,\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}pack_index.cpp "\t${PLATFORM_PREFIX}qplc_pack_index_8u16u,\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}pack_index.cpp "\t${PLATFORM_PREFIX}qplc_pack_index_8u32u,\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}pack_index.cpp "\t${PLATFORM_PREFIX}qplc_pack_bits_be_nu,\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}pack_index.cpp "\t${PLATFORM_PREFIX}qplc_pack_index_8u,\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}pack_index.cpp "\t${PLATFORM_PREFIX}qplc_pack_index_be_8u16u,\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}pack_index.cpp "\t${PLATFORM_PREFIX}qplc_pack_index_be_8u32u};\n") - - file(APPEND ${directory}/${PLATFORM_PREFIX}pack_index.cpp "}\n") - - # - # Write default bit width functions - # - foreach(DEAULT_BIT_WIDTH_FUNCTION IN LISTS DEFAULT_BIT_WIDTH_FUNCTIONS_LIST) - file(WRITE ${directory}/${PLATFORM_PREFIX}${DEAULT_BIT_WIDTH_FUNCTION}.cpp "#include \"qplc_api.h\"\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}${DEAULT_BIT_WIDTH_FUNCTION}.cpp "#include \"dispatcher/dispatcher.hpp\"\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}${DEAULT_BIT_WIDTH_FUNCTION}.cpp "namespace qpl::core_sw::dispatcher\n{\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}${DEAULT_BIT_WIDTH_FUNCTION}.cpp "${DEAULT_BIT_WIDTH_FUNCTION}_table_t ${PLATFORM_PREFIX}${DEAULT_BIT_WIDTH_FUNCTION}_table = {\n") - - #get last element of the list - set(LAST_ELEMENT "") - list(GET DEFAULT_BIT_WIDTH_LIST -1 LAST_ELEMENT) - - foreach(BIT_WIDTH IN LISTS DEFAULT_BIT_WIDTH_LIST) - - set(FUNCTION_NAME "") - get_function_name_with_default_bit_width(${DEAULT_BIT_WIDTH_FUNCTION} ${BIT_WIDTH} FUNCTION_NAME) - - if(BIT_WIDTH STREQUAL LAST_ELEMENT) - file(APPEND ${directory}/${PLATFORM_PREFIX}${DEAULT_BIT_WIDTH_FUNCTION}.cpp "\t${PLATFORM_PREFIX}qplc_${FUNCTION_NAME}};\n") - else() - file(APPEND ${directory}/${PLATFORM_PREFIX}${DEAULT_BIT_WIDTH_FUNCTION}.cpp "\t${PLATFORM_PREFIX}qplc_${FUNCTION_NAME},\n") - endif() - endforeach() - - file(APPEND ${directory}/${PLATFORM_PREFIX}${DEAULT_BIT_WIDTH_FUNCTION}.cpp "}\n") - endforeach() - - # - # Write aggregates table - # - file(WRITE ${directory}/${PLATFORM_PREFIX}aggregates.cpp "#include \"qplc_api.h\"\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}aggregates.cpp "#include \"dispatcher/dispatcher.hpp\"\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}aggregates.cpp "namespace qpl::core_sw::dispatcher\n{\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}aggregates.cpp "aggregates_table_t ${PLATFORM_PREFIX}aggregates_table = {\n") - - file(APPEND ${directory}/${PLATFORM_PREFIX}aggregates.cpp "\t${PLATFORM_PREFIX}qplc_bit_aggregates_8u,\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}aggregates.cpp "\t${PLATFORM_PREFIX}qplc_aggregates_8u,\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}aggregates.cpp "\t${PLATFORM_PREFIX}qplc_aggregates_16u,\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}aggregates.cpp "\t${PLATFORM_PREFIX}qplc_aggregates_32u};\n") - - file(APPEND ${directory}/${PLATFORM_PREFIX}aggregates.cpp "}\n") - - # - # Write mem_copy functions table - # - file(WRITE ${directory}/${PLATFORM_PREFIX}memory_copy.cpp "#include \"qplc_api.h\"\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}memory_copy.cpp "#include \"dispatcher/dispatcher.hpp\"\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}memory_copy.cpp "namespace qpl::core_sw::dispatcher\n{\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}memory_copy.cpp "memory_copy_table_t ${PLATFORM_PREFIX}memory_copy_table = {\n") - - file(APPEND ${directory}/${PLATFORM_PREFIX}memory_copy.cpp "\t${PLATFORM_PREFIX}qplc_copy_8u,\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}memory_copy.cpp "\t${PLATFORM_PREFIX}qplc_copy_16u,\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}memory_copy.cpp "\t${PLATFORM_PREFIX}qplc_copy_32u};\n") - - file(APPEND ${directory}/${PLATFORM_PREFIX}memory_copy.cpp "}\n") - - # - # Write mem_copy functions table - # - file(WRITE ${directory}/${PLATFORM_PREFIX}zero.cpp "#include \"qplc_api.h\"\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}zero.cpp "#include \"dispatcher/dispatcher.hpp\"\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}zero.cpp "namespace qpl::core_sw::dispatcher\n{\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}zero.cpp "zero_table_t ${PLATFORM_PREFIX}zero_table = {\n") - - file(APPEND ${directory}/${PLATFORM_PREFIX}zero.cpp "\t${PLATFORM_PREFIX}qplc_zero_8u};\n") - - file(APPEND ${directory}/${PLATFORM_PREFIX}zero.cpp "}\n") - - # - # Write move functions table - # - file(WRITE ${directory}/${PLATFORM_PREFIX}move.cpp "#include \"qplc_api.h\"\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}move.cpp "#include \"dispatcher/dispatcher.hpp\"\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}move.cpp "namespace qpl::core_sw::dispatcher\n{\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}move.cpp "move_table_t ${PLATFORM_PREFIX}move_table = {\n") - - file(APPEND ${directory}/${PLATFORM_PREFIX}move.cpp "\t${PLATFORM_PREFIX}qplc_move_8u};\n") - - file(APPEND ${directory}/${PLATFORM_PREFIX}move.cpp "}\n") - - # - # Write crc64 function table - # - file(WRITE ${directory}/${PLATFORM_PREFIX}crc64.cpp "#include \"qplc_api.h\"\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}crc64.cpp "#include \"dispatcher/dispatcher.hpp\"\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}crc64.cpp "namespace qpl::core_sw::dispatcher\n{\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}crc64.cpp "crc64_table_t ${PLATFORM_PREFIX}crc64_table = {\n") - - file(APPEND ${directory}/${PLATFORM_PREFIX}crc64.cpp "\t${PLATFORM_PREFIX}qplc_crc64};\n") - - file(APPEND ${directory}/${PLATFORM_PREFIX}crc64.cpp "}\n") - - # - # Write xor_checksum function table - # - file(WRITE ${directory}/${PLATFORM_PREFIX}xor_checksum.cpp "#include \"qplc_api.h\"\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}xor_checksum.cpp "#include \"dispatcher/dispatcher.hpp\"\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}xor_checksum.cpp "namespace qpl::core_sw::dispatcher\n{\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}xor_checksum.cpp "xor_checksum_table_t ${PLATFORM_PREFIX}xor_checksum_table = {\n") - - file(APPEND ${directory}/${PLATFORM_PREFIX}xor_checksum.cpp "\t${PLATFORM_PREFIX}qplc_xor_checksum_8u};\n") - - file(APPEND ${directory}/${PLATFORM_PREFIX}xor_checksum.cpp "}\n") - - # - # Write deflate functions table - # - file(WRITE ${directory}/${PLATFORM_PREFIX}deflate.cpp "#include \"deflate_slow_icf.h\"\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}deflate.cpp "#include \"deflate_hash_table.h\"\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}deflate.cpp "#include \"deflate_histogram.h\"\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}deflate.cpp "#include \"dispatcher/dispatcher.hpp\"\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}deflate.cpp "namespace qpl::core_sw::dispatcher\n{\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}deflate.cpp "deflate_table_t ${PLATFORM_PREFIX}deflate_table = {\n") - - file(APPEND ${directory}/${PLATFORM_PREFIX}deflate.cpp "\t reinterpret_cast(&${PLATFORM_PREFIX}slow_deflate_icf_body),\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}deflate.cpp "\t reinterpret_cast(&${PLATFORM_PREFIX}deflate_histogram_reset),\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}deflate.cpp "\t reinterpret_cast(&${PLATFORM_PREFIX}deflate_hash_table_reset)};\n") - - file(APPEND ${directory}/${PLATFORM_PREFIX}deflate.cpp "}\n") - - # - # Write deflate fix functions table - # - file(WRITE ${directory}/${PLATFORM_PREFIX}deflate_fix.cpp "#include \"deflate_slow.h\"\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}deflate_fix.cpp "#include \"dispatcher/dispatcher.hpp\"\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}deflate_fix.cpp "namespace qpl::core_sw::dispatcher\n{\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}deflate_fix.cpp "deflate_fix_table_t ${PLATFORM_PREFIX}deflate_fix_table = {\n") - - file(APPEND ${directory}/${PLATFORM_PREFIX}deflate_fix.cpp "\t reinterpret_cast(&${PLATFORM_PREFIX}slow_deflate_body)};\n") - - file(APPEND ${directory}/${PLATFORM_PREFIX}deflate_fix.cpp "}\n") - - # - # Write setup_dictionary functions table - # - file(WRITE ${directory}/${PLATFORM_PREFIX}setup_dictionary.cpp "#include \"deflate_slow_utils.h\"\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}setup_dictionary.cpp "#include \"dispatcher/dispatcher.hpp\"\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}setup_dictionary.cpp "namespace qpl::core_sw::dispatcher\n{\n") - file(APPEND ${directory}/${PLATFORM_PREFIX}setup_dictionary.cpp "setup_dictionary_table_t ${PLATFORM_PREFIX}setup_dictionary_table = {\n") - - file(APPEND ${directory}/${PLATFORM_PREFIX}setup_dictionary.cpp "\t reinterpret_cast(&${PLATFORM_PREFIX}setup_dictionary)};\n") - - file(APPEND ${directory}/${PLATFORM_PREFIX}setup_dictionary.cpp "}\n") - - endforeach() -endfunction() - -# [SUBDIR]isal - -enable_language(ASM_NASM) - -set(ISAL_C_SRC ${QPL_SRC_DIR}/isal/igzip/adler32_base.c - ${QPL_SRC_DIR}/isal/igzip/huff_codes.c - ${QPL_SRC_DIR}/isal/igzip/hufftables_c.c - ${QPL_SRC_DIR}/isal/igzip/igzip.c - ${QPL_SRC_DIR}/isal/igzip/igzip_base.c - ${QPL_SRC_DIR}/isal/igzip/flatten_ll.c - ${QPL_SRC_DIR}/isal/igzip/encode_df.c - ${QPL_SRC_DIR}/isal/igzip/igzip_icf_base.c - ${QPL_SRC_DIR}/isal/igzip/igzip_inflate.c - ${QPL_SRC_DIR}/isal/igzip/igzip_icf_body.c - ${QPL_SRC_DIR}/isal/crc/crc_base.c - ${QPL_SRC_DIR}/isal/crc/crc64_base.c) - -set(ISAL_ASM_SRC ${QPL_SRC_DIR}/isal/igzip/igzip_body.asm - ${QPL_SRC_DIR}/isal/igzip/igzip_gen_icf_map_lh1_04.asm - ${QPL_SRC_DIR}/isal/igzip/igzip_gen_icf_map_lh1_06.asm - ${QPL_SRC_DIR}/isal/igzip/igzip_decode_block_stateless_04.asm - ${QPL_SRC_DIR}/isal/igzip/igzip_finish.asm - ${QPL_SRC_DIR}/isal/igzip/encode_df_04.asm - ${QPL_SRC_DIR}/isal/igzip/encode_df_06.asm - ${QPL_SRC_DIR}/isal/igzip/igzip_decode_block_stateless_01.asm - ${QPL_SRC_DIR}/isal/igzip/proc_heap.asm - ${QPL_SRC_DIR}/isal/igzip/igzip_icf_body_h1_gr_bt.asm - ${QPL_SRC_DIR}/isal/igzip/igzip_icf_finish.asm - ${QPL_SRC_DIR}/isal/igzip/igzip_inflate_multibinary.asm - ${QPL_SRC_DIR}/isal/igzip/igzip_update_histogram_01.asm - ${QPL_SRC_DIR}/isal/igzip/igzip_update_histogram_04.asm - ${QPL_SRC_DIR}/isal/igzip/rfc1951_lookup.asm - ${QPL_SRC_DIR}/isal/igzip/adler32_sse.asm - ${QPL_SRC_DIR}/isal/igzip/adler32_avx2_4.asm - ${QPL_SRC_DIR}/isal/igzip/igzip_deflate_hash.asm - ${QPL_SRC_DIR}/isal/igzip/igzip_set_long_icf_fg_04.asm - ${QPL_SRC_DIR}/isal/igzip/igzip_set_long_icf_fg_06.asm - ${QPL_SRC_DIR}/isal/igzip/igzip_multibinary.asm - ${QPL_SRC_DIR}/isal/crc/crc_multibinary.asm - ${QPL_SRC_DIR}/isal/crc/crc32_gzip_refl_by8.asm - ${QPL_SRC_DIR}/isal/crc/crc32_gzip_refl_by8_02.asm - ${QPL_SRC_DIR}/isal/crc/crc32_gzip_refl_by16_10.asm - ${QPL_SRC_DIR}/isal/crc/crc32_ieee_01.asm - ${QPL_SRC_DIR}/isal/crc/crc32_ieee_02.asm - ${QPL_SRC_DIR}/isal/crc/crc32_ieee_by4.asm - ${QPL_SRC_DIR}/isal/crc/crc32_ieee_by16_10.asm - ${QPL_SRC_DIR}/isal/crc/crc32_iscsi_00.asm - ${QPL_SRC_DIR}/isal/crc/crc32_iscsi_01.asm - ${QPL_SRC_DIR}/isal/crc/crc32_iscsi_by16_10.asm) - -# Adding ISA-L library target -add_library(isal OBJECT ${ISAL_C_SRC}) -add_library(isal_asm OBJECT ${ISAL_ASM_SRC}) - -set_property(GLOBAL APPEND PROPERTY QPL_LIB_DEPS - $) - -set_property(GLOBAL APPEND PROPERTY QPL_LIB_DEPS - $) - -# Setting external and internal interfaces for ISA-L library -target_include_directories(isal - PUBLIC $ - PUBLIC ${QPL_SRC_DIR}/isal/igzip) - -set_target_properties(isal PROPERTIES - CXX_STANDARD 11 - C_STANDARD 99) - -# AS_FEATURE_LEVEL=10 means "Check SIMD capabilities of the target system at runtime and use up to AVX512 if available". -# HAVE_KNOWS_AVX512 means rely on AVX512 being available on the target system. -target_compile_options(isal_asm PRIVATE "-I${QPL_SRC_DIR}/isal/include/" - PRIVATE "-I${QPL_SRC_DIR}/isal/igzip/" - PRIVATE "-I${QPL_SRC_DIR}/isal/crc/" - PRIVATE "-DHAVE_AS_KNOWS_AVX512" - PRIVATE "-DAS_FEATURE_LEVEL=10" - PRIVATE "-DQPL_LIB") - -# Here must remove "-fno-sanitize=undefined" from COMPILE_OPTIONS. -# Otherwise nasm compiler would fail to proceed due to unrecognition of "-fno-sanitize=undefined" -if (SANITIZE STREQUAL "undefined") - get_target_property(target_options isal_asm COMPILE_OPTIONS) - list(REMOVE_ITEM target_options "-fno-sanitize=undefined") - set_property(TARGET isal_asm PROPERTY COMPILE_OPTIONS ${target_options}) -endif() - -target_compile_definitions(isal PUBLIC - QPL_LIB - NDEBUG) - -# [SUBDIR]core-sw -# Create set of libraries corresponding to supported platforms for SW fallback which are implemented by AVX512 and non-AVX512 instructions respectively. -# The upper level QPL API will check SIMD capabilities of the target system at runtime and decide to call AVX512 function or non-AVX512 function. -# Hence, here we don't need put ENABLE_AVX512 CMake switch. - -get_list_of_supported_optimizations(PLATFORMS_LIST) - -foreach(PLATFORM_ID IN LISTS PLATFORMS_LIST) - # Find Core Sources - file(GLOB SOURCES - ${QPL_SRC_DIR}/core-sw/src/checksums/*.c - ${QPL_SRC_DIR}/core-sw/src/filtering/*.c - ${QPL_SRC_DIR}/core-sw/src/other/*.c - ${QPL_SRC_DIR}/core-sw/src/compression/*.c) - - file(GLOB DATA_SOURCES - ${QPL_SRC_DIR}/core-sw/src/data/*.c) - - # Create library - add_library(qplcore_${PLATFORM_ID} OBJECT ${SOURCES}) - - set_property(GLOBAL APPEND PROPERTY QPL_LIB_DEPS - $) - - target_include_directories(qplcore_${PLATFORM_ID} - PUBLIC $ - PUBLIC $ - PUBLIC $ - PUBLIC $ - PRIVATE $) - - # Set specific compiler options and/or definitions based on a platform - if (${PLATFORM_ID} MATCHES "avx512") - target_compile_definitions(qplcore_${PLATFORM_ID} PRIVATE PLATFORM=2) - target_compile_options(qplcore_${PLATFORM_ID} PRIVATE -march=skylake-avx512) - else() # Create default px library - target_compile_definitions(qplcore_${PLATFORM_ID} PRIVATE PLATFORM=0) - endif() - - target_link_libraries(qplcore_${PLATFORM_ID} isal) -endforeach() - -# -# Create dispatcher between platforms and auto-generated wrappers -# -file(GLOB SW_DISPATCHER_SOURCES ${QPL_SRC_DIR}/core-sw/dispatcher/*.cpp) - -add_library(qplcore_sw_dispatcher OBJECT ${SW_DISPATCHER_SOURCES}) - -set_property(GLOBAL APPEND PROPERTY QPL_LIB_DEPS - $) - -target_include_directories(qplcore_sw_dispatcher - PUBLIC $) - -# Generate kernel wrappers -generate_unpack_kernel_arrays(${QPL_BINARY_DIR} "${PLATFORMS_LIST}") - -foreach(PLATFORM_ID IN LISTS PLATFORMS_LIST) - file(GLOB GENERATED_${PLATFORM_ID}_TABLES_SRC ${QPL_BINARY_DIR}/generated/${PLATFORM_ID}_*.cpp) - - target_sources(qplcore_sw_dispatcher PRIVATE ${GENERATED_${PLATFORM_ID}_TABLES_SRC}) - - # Set specific compiler options and/or definitions based on a platform - if (${PLATFORM_ID} MATCHES "avx512") - set_source_files_properties(${GENERATED_${PLATFORM_ID}_TABLES_SRC} PROPERTIES COMPILE_DEFINITIONS PLATFORM=2) - else() - set_source_files_properties(${GENERATED_${PLATFORM_ID}_TABLES_SRC} PROPERTIES COMPILE_DEFINITIONS PLATFORM=0) - endif() - - target_include_directories(qplcore_sw_dispatcher - PUBLIC $) -endforeach() - -set_target_properties(qplcore_sw_dispatcher PROPERTIES CXX_STANDARD 17) - -# w/a for build compatibility with ISAL codebase -target_compile_definitions(qplcore_sw_dispatcher PUBLIC -DQPL_LIB) - -target_compile_options(qplcore_sw_dispatcher - PRIVATE ${QPL_LINUX_TOOLCHAIN_CPP_EMBEDDED_FLAGS}) - -# [SUBDIR]core-iaa -file(GLOB HW_PATH_SRC ${QPL_SRC_DIR}/core-iaa/sources/aecs/*.c - ${QPL_SRC_DIR}/core-iaa/sources/driver_loader/*.c - ${QPL_SRC_DIR}/core-iaa/sources/descriptors/*.c - ${QPL_SRC_DIR}/core-iaa/sources/*.c) - -# Create library -add_library(core_iaa OBJECT ${HW_PATH_SRC}) - -set_property(GLOBAL APPEND PROPERTY QPL_LIB_DEPS - $) - -target_include_directories(core_iaa - PRIVATE ${UUID_DIR} - PUBLIC $ - PUBLIC $ - PRIVATE $ # status.h in own_checkers.h - PRIVATE $ # for own_checkers.h - PRIVATE $) - -target_compile_features(core_iaa PRIVATE c_std_11) - -target_compile_definitions(core_iaa PRIVATE QPL_BADARG_CHECK - PRIVATE $<$:LOG_HW_INIT> - PRIVATE $<$:DYNAMIC_LOADING_LIBACCEL_CONFIG>) - -# [SUBDIR]middle-layer -file(GLOB MIDDLE_LAYER_SRC - ${QPL_SRC_DIR}/middle-layer/accelerator/*.cpp - ${QPL_SRC_DIR}/middle-layer/analytics/*.cpp - ${QPL_SRC_DIR}/middle-layer/common/*.cpp - ${QPL_SRC_DIR}/middle-layer/compression/*.cpp - ${QPL_SRC_DIR}/middle-layer/compression/*/*.cpp - ${QPL_SRC_DIR}/middle-layer/compression/*/*/*.cpp - ${QPL_SRC_DIR}/middle-layer/dispatcher/*.cpp - ${QPL_SRC_DIR}/middle-layer/other/*.cpp - ${QPL_SRC_DIR}/middle-layer/util/*.cpp) - -add_library(middle_layer_lib OBJECT - ${MIDDLE_LAYER_SRC}) - -set_property(GLOBAL APPEND PROPERTY QPL_LIB_DEPS - $) - -target_compile_options(middle_layer_lib - PRIVATE $<$:$<$:-O3;-U_FORTIFY_SOURCE;-D_FORTIFY_SOURCE=2>> - PRIVATE ${QPL_LINUX_TOOLCHAIN_CPP_EMBEDDED_FLAGS}) - -target_compile_definitions(middle_layer_lib - PUBLIC QPL_VERSION="${QPL_VERSION}" - PUBLIC $<$:LOG_HW_INIT> - PUBLIC $<$:QPL_EFFICIENT_WAIT> - PUBLIC QPL_BADARG_CHECK - PUBLIC $<$:DYNAMIC_LOADING_LIBACCEL_CONFIG>) - -set_target_properties(middle_layer_lib PROPERTIES CXX_STANDARD 17) - -target_include_directories(middle_layer_lib - PRIVATE ${UUID_DIR} - PUBLIC $ - PUBLIC $ - PRIVATE $ - PUBLIC $ - PUBLIC $ - PUBLIC $) - -target_compile_definitions(middle_layer_lib PUBLIC -DQPL_LIB) - -# [SUBDIR]c_api -file(GLOB QPL_C_API_SRC - ${QPL_SRC_DIR}/c_api/compression_operations/*.c - ${QPL_SRC_DIR}/c_api/compression_operations/*.cpp - ${QPL_SRC_DIR}/c_api/filter_operations/*.cpp - ${QPL_SRC_DIR}/c_api/legacy_hw_path/*.c - ${QPL_SRC_DIR}/c_api/legacy_hw_path/*.cpp - ${QPL_SRC_DIR}/c_api/other_operations/*.cpp - ${QPL_SRC_DIR}/c_api/serialization/*.cpp - ${QPL_SRC_DIR}/c_api/*.cpp) - -add_library(qpl_c_api OBJECT ${QPL_C_API_SRC}) - -target_include_directories(qpl_c_api - PUBLIC $ - PUBLIC $ $ - PRIVATE $) - -set_target_properties(qpl_c_api PROPERTIES - $<$:C_STANDARD 17 - CXX_STANDARD 17) - -target_compile_options(qpl_c_api - PRIVATE $<$:$<$:-O3;-U_FORTIFY_SOURCE;-D_FORTIFY_SOURCE=2>> - PRIVATE $<$:${QPL_LINUX_TOOLCHAIN_CPP_EMBEDDED_FLAGS}>) - -target_compile_definitions(qpl_c_api - PUBLIC -DQPL_BADARG_CHECK # own_checkers.h - PUBLIC -DQPL_LIB # needed for middle_layer_lib - PUBLIC $<$:LOG_HW_INIT>) # needed for middle_layer_lib - -set_property(GLOBAL APPEND PROPERTY QPL_LIB_DEPS - $) - -# Final _qpl target - -get_property(LIB_DEPS GLOBAL PROPERTY QPL_LIB_DEPS) - -add_library(_qpl STATIC ${LIB_DEPS}) - -target_include_directories(_qpl - PUBLIC $ $) - -target_link_libraries(_qpl - PRIVATE ch_contrib::accel-config) - -target_include_directories(_qpl SYSTEM BEFORE - PUBLIC "${QPL_PROJECT_DIR}/include" - PUBLIC ${UUID_DIR}) - -add_library (ch_contrib::qpl ALIAS _qpl) diff --git a/contrib/qpl-cmake/uuid/uuid.h b/contrib/qpl-cmake/uuid/uuid.h deleted file mode 100644 index bf108ba0d29..00000000000 --- a/contrib/qpl-cmake/uuid/uuid.h +++ /dev/null @@ -1,4 +0,0 @@ -#ifndef _QPL_UUID_UUID_H -#define _QPL_UUID_UUID_H -typedef unsigned char uuid_t[16]; -#endif /* _QPL_UUID_UUID_H */ diff --git a/docker/test/integration/runner/requirements.txt b/docker/test/integration/runner/requirements.txt index 4802623abd6..bb0c4d001e6 100644 --- a/docker/test/integration/runner/requirements.txt +++ b/docker/test/integration/runner/requirements.txt @@ -23,6 +23,7 @@ charset-normalizer==3.3.2 click==8.1.7 confluent-kafka==2.3.0 cryptography==42.0.0 +datacompy==0.7.3 dbus-python==1.2.18 delta-spark==2.3.0 deltalake==0.16.0 @@ -60,6 +61,7 @@ oauthlib==3.2.0 packaging==24.0 paramiko==3.4.0 pika==1.2.0 +pandas==2.2.3 pip==24.1.1 pluggy==1.5.0 protobuf==4.25.2 diff --git a/docker/test/style/Dockerfile b/docker/test/style/Dockerfile index cdc1d1fa095..fa6b087eb7d 100644 --- a/docker/test/style/Dockerfile +++ b/docker/test/style/Dockerfile @@ -16,6 +16,7 @@ RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes \ libxml2-utils \ locales \ moreutils \ + ripgrep \ python3-pip \ yamllint \ zstd \ diff --git a/docs/en/development/building_and_benchmarking_deflate_qpl.md b/docs/en/development/building_and_benchmarking_deflate_qpl.md deleted file mode 100644 index b9d39b8cc2d..00000000000 --- a/docs/en/development/building_and_benchmarking_deflate_qpl.md +++ /dev/null @@ -1,327 +0,0 @@ ---- -slug: /en/development/building_and_benchmarking_deflate_qpl -sidebar_position: 73 -sidebar_label: Building and Benchmarking DEFLATE_QPL -description: How to build Clickhouse and run benchmark with DEFLATE_QPL Codec ---- - -# Build Clickhouse with DEFLATE_QPL - -- Make sure your host machine meet the QPL required [prerequisites](https://intel.github.io/qpl/documentation/get_started_docs/installation.html#prerequisites) -- deflate_qpl is enabled by default during cmake build. In case you accidentally change it, please double-check build flag: ENABLE_QPL=1 - -- For generic requirements, please refer to Clickhouse generic [build instructions](/docs/en/development/build.md) - -# Run Benchmark with DEFLATE_QPL - -## Files list - -The folders `benchmark_sample` under [qpl-cmake](https://github.com/ClickHouse/ClickHouse/tree/master/contrib/qpl-cmake) give example to run benchmark with python scripts: - -`client_scripts` contains python scripts for running typical benchmark, for example: -- `client_stressing_test.py`: The python script for query stress test with [1~4] server instances. -- `queries_ssb.sql`: The file lists all queries for [Star Schema Benchmark](https://clickhouse.com/docs/en/getting-started/example-datasets/star-schema/) -- `allin1_ssb.sh`: This shell script executes benchmark workflow all in one automatically. - -`database_files` means it will store database files according to lz4/deflate/zstd codec. - -## Run benchmark automatically for Star Schema: - -``` bash -$ cd ./benchmark_sample/client_scripts -$ sh run_ssb.sh -``` - -After complete, please check all the results in this folder:`./output/` - -In case you run into failure, please manually run benchmark as below sections. - -## Definition - -[CLICKHOUSE_EXE] means the path of clickhouse executable program. - -## Environment - -- CPU: Sapphire Rapid -- OS Requirements refer to [System Requirements for QPL](https://intel.github.io/qpl/documentation/get_started_docs/installation.html#system-requirements) -- IAA Setup refer to [Accelerator Configuration](https://intel.github.io/qpl/documentation/get_started_docs/installation.html#accelerator-configuration) -- Install python modules: - -``` bash -pip3 install clickhouse_driver numpy -``` - -[Self-check for IAA] - -``` bash -$ accel-config list | grep -P 'iax|state' -``` - -Expected output like this: -``` bash - "dev":"iax1", - "state":"enabled", - "state":"enabled", -``` - -If you see nothing output, it means IAA is not ready to work. Please check IAA setup again. - -## Generate raw data - -``` bash -$ cd ./benchmark_sample -$ mkdir rawdata_dir && cd rawdata_dir -``` - -Use [`dbgen`](https://clickhouse.com/docs/en/getting-started/example-datasets/star-schema) to generate 100 million rows data with the parameters: --s 20 - -The files like `*.tbl` are expected to output under `./benchmark_sample/rawdata_dir/ssb-dbgen`: - -## Database setup - -Set up database with LZ4 codec - -``` bash -$ cd ./database_dir/lz4 -$ [CLICKHOUSE_EXE] server -C config_lz4.xml >&/dev/null& -$ [CLICKHOUSE_EXE] client -``` - -Here you should see the message `Connected to ClickHouse server` from console which means client successfully setup connection with server. - -Complete below three steps mentioned in [Star Schema Benchmark](https://clickhouse.com/docs/en/getting-started/example-datasets/star-schema) -- Creating tables in ClickHouse -- Inserting data. Here should use `./benchmark_sample/rawdata_dir/ssb-dbgen/*.tbl` as input data. -- Converting “star schema” to de-normalized “flat schema” - -Set up database with IAA Deflate codec - -``` bash -$ cd ./database_dir/deflate -$ [CLICKHOUSE_EXE] server -C config_deflate.xml >&/dev/null& -$ [CLICKHOUSE_EXE] client -``` -Complete three steps same as lz4 above - -Set up database with ZSTD codec - -``` bash -$ cd ./database_dir/zstd -$ [CLICKHOUSE_EXE] server -C config_zstd.xml >&/dev/null& -$ [CLICKHOUSE_EXE] client -``` -Complete three steps same as lz4 above - -[self-check] -For each codec(lz4/zstd/deflate), please execute below query to make sure the databases are created successfully: -```sql -select count() from lineorder_flat -``` -You are expected to see below output: -```sql -┌───count()─┐ -│ 119994608 │ -└───────────┘ -``` -[Self-check for IAA Deflate codec] - -At the first time you execute insertion or query from client, clickhouse server console is expected to print this log: -```text -Hardware-assisted DeflateQpl codec is ready! -``` -If you never find this, but see another log as below: -```text -Initialization of hardware-assisted DeflateQpl codec failed -``` -That means IAA devices is not ready, you need check IAA setup again. - -## Benchmark with single instance - -- Before start benchmark, Please disable C6 and set CPU frequency governor to be `performance` - -``` bash -$ cpupower idle-set -d 3 -$ cpupower frequency-set -g performance -``` - -- To eliminate impact of memory bound on cross sockets, we use `numactl` to bind server on one socket and client on another socket. -- Single instance means single server connected with single client - -Now run benchmark for LZ4/Deflate/ZSTD respectively: - -LZ4: - -``` bash -$ cd ./database_dir/lz4 -$ numactl -m 0 -N 0 [CLICKHOUSE_EXE] server -C config_lz4.xml >&/dev/null& -$ cd ./client_scripts -$ numactl -m 1 -N 1 python3 client_stressing_test.py queries_ssb.sql 1 > lz4.log -``` - -IAA deflate: - -``` bash -$ cd ./database_dir/deflate -$ numactl -m 0 -N 0 [CLICKHOUSE_EXE] server -C config_deflate.xml >&/dev/null& -$ cd ./client_scripts -$ numactl -m 1 -N 1 python3 client_stressing_test.py queries_ssb.sql 1 > deflate.log -``` - -ZSTD: - -``` bash -$ cd ./database_dir/zstd -$ numactl -m 0 -N 0 [CLICKHOUSE_EXE] server -C config_zstd.xml >&/dev/null& -$ cd ./client_scripts -$ numactl -m 1 -N 1 python3 client_stressing_test.py queries_ssb.sql 1 > zstd.log -``` - -Now three logs should be output as expected: -```text -lz4.log -deflate.log -zstd.log -``` - -How to check performance metrics: - -We focus on QPS, please search the keyword: `QPS_Final` and collect statistics - -## Benchmark with multi-instances - -- To reduce impact of memory bound on too much threads, We recommend run benchmark with multi-instances. -- Multi-instance means multiple(2 or 4)servers connected with respective client. -- The cores of one socket need to be divided equally and assigned to the servers respectively. -- For multi-instances, must create new folder for each codec and insert dataset by following the similar steps as single instance. - -There are 2 differences: -- For client side, you need launch clickhouse with the assigned port during table creation and data insertion. -- For server side, you need launch clickhouse with the specific xml config file in which port has been assigned. All customized xml config files for multi-instances has been provided under ./server_config. - -Here we assume there are 60 cores per socket and take 2 instances for example. -Launch server for first instance -LZ4: - -``` bash -$ cd ./database_dir/lz4 -$ numactl -C 0-29,120-149 [CLICKHOUSE_EXE] server -C config_lz4.xml >&/dev/null& -``` - -ZSTD: - -``` bash -$ cd ./database_dir/zstd -$ numactl -C 0-29,120-149 [CLICKHOUSE_EXE] server -C config_zstd.xml >&/dev/null& -``` - -IAA Deflate: - -``` bash -$ cd ./database_dir/deflate -$ numactl -C 0-29,120-149 [CLICKHOUSE_EXE] server -C config_deflate.xml >&/dev/null& -``` - -[Launch server for second instance] - -LZ4: - -``` bash -$ cd ./database_dir && mkdir lz4_s2 && cd lz4_s2 -$ cp ../../server_config/config_lz4_s2.xml ./ -$ numactl -C 30-59,150-179 [CLICKHOUSE_EXE] server -C config_lz4_s2.xml >&/dev/null& -``` - -ZSTD: - -``` bash -$ cd ./database_dir && mkdir zstd_s2 && cd zstd_s2 -$ cp ../../server_config/config_zstd_s2.xml ./ -$ numactl -C 30-59,150-179 [CLICKHOUSE_EXE] server -C config_zstd_s2.xml >&/dev/null& -``` - -IAA Deflate: - -``` bash -$ cd ./database_dir && mkdir deflate_s2 && cd deflate_s2 -$ cp ../../server_config/config_deflate_s2.xml ./ -$ numactl -C 30-59,150-179 [CLICKHOUSE_EXE] server -C config_deflate_s2.xml >&/dev/null& -``` - -Creating tables && Inserting data for second instance - -Creating tables: - -``` bash -$ [CLICKHOUSE_EXE] client -m --port=9001 -``` - -Inserting data: - -``` bash -$ [CLICKHOUSE_EXE] client --query "INSERT INTO [TBL_FILE_NAME] FORMAT CSV" < [TBL_FILE_NAME].tbl --port=9001 -``` - -- [TBL_FILE_NAME] represents the name of a file named with the regular expression: *. tbl under `./benchmark_sample/rawdata_dir/ssb-dbgen`. -- `--port=9001` stands for the assigned port for server instance which is also defined in config_lz4_s2.xml/config_zstd_s2.xml/config_deflate_s2.xml. For even more instances, you need replace it with the value: 9002/9003 which stand for s3/s4 instance respectively. If you don't assign it, the port is 9000 by default which has been used by first instance. - -Benchmarking with 2 instances - -LZ4: - -``` bash -$ cd ./database_dir/lz4 -$ numactl -C 0-29,120-149 [CLICKHOUSE_EXE] server -C config_lz4.xml >&/dev/null& -$ cd ./database_dir/lz4_s2 -$ numactl -C 30-59,150-179 [CLICKHOUSE_EXE] server -C config_lz4_s2.xml >&/dev/null& -$ cd ./client_scripts -$ numactl -m 1 -N 1 python3 client_stressing_test.py queries_ssb.sql 2 > lz4_2insts.log -``` - -ZSTD: - -``` bash -$ cd ./database_dir/zstd -$ numactl -C 0-29,120-149 [CLICKHOUSE_EXE] server -C config_zstd.xml >&/dev/null& -$ cd ./database_dir/zstd_s2 -$ numactl -C 30-59,150-179 [CLICKHOUSE_EXE] server -C config_zstd_s2.xml >&/dev/null& -$ cd ./client_scripts -$ numactl -m 1 -N 1 python3 client_stressing_test.py queries_ssb.sql 2 > zstd_2insts.log -``` - -IAA deflate - -``` bash -$ cd ./database_dir/deflate -$ numactl -C 0-29,120-149 [CLICKHOUSE_EXE] server -C config_deflate.xml >&/dev/null& -$ cd ./database_dir/deflate_s2 -$ numactl -C 30-59,150-179 [CLICKHOUSE_EXE] server -C config_deflate_s2.xml >&/dev/null& -$ cd ./client_scripts -$ numactl -m 1 -N 1 python3 client_stressing_test.py queries_ssb.sql 2 > deflate_2insts.log -``` - -Here the last argument: `2` of client_stressing_test.py stands for the number of instances. For more instances, you need replace it with the value: 3 or 4. This script support up to 4 instances/ - -Now three logs should be output as expected: - -``` text -lz4_2insts.log -deflate_2insts.log -zstd_2insts.log -``` -How to check performance metrics: - -We focus on QPS, please search the keyword: `QPS_Final` and collect statistics - -Benchmark setup for 4 instances is similar with 2 instances above. -We recommend use 2 instances benchmark data as final report for review. - -## Tips - -Each time before launch new clickhouse server, please make sure no background clickhouse process running, please check and kill old one: - -``` bash -$ ps -aux| grep clickhouse -$ kill -9 [PID] -``` -By comparing the query list in ./client_scripts/queries_ssb.sql with official [Star Schema Benchmark](https://clickhouse.com/docs/en/getting-started/example-datasets/star-schema), you will find 3 queries are not included: Q1.2/Q1.3/Q3.4 . This is because cpu utilization% is very low <10% for these queries which means cannot demonstrate performance differences. diff --git a/docs/en/development/contrib.md b/docs/en/development/contrib.md index c49492c1cb4..aac322f05eb 100644 --- a/docs/en/development/contrib.md +++ b/docs/en/development/contrib.md @@ -18,7 +18,7 @@ SELECT library_name, license_type, license_path FROM system.licenses ORDER BY li Note that the listed libraries are the ones located in the `contrib/` directory of the ClickHouse repository. Depending on the build options, some of the libraries may have not been compiled, and, as a result, their functionality may not be available at runtime. -[Example](https://play.clickhouse.com/play?user=play#U0VMRUNUIGxpYnJhcnlfbmFtZSwgbGljZW5zZV90eXBlLCBsaWNlbnNlX3BhdGggRlJPTSBzeXN0ZW0ubGljZW5zZXMgT1JERVIgQlkgbGlicmFyeV9uYW1lIENPTExBVEUgJ2VuJw==) +[Example](https://sql.clickhouse.com?query_id=478GCPU7LRTSZJBNY3EJT3) ## Adding and maintaining third-party libraries diff --git a/docs/en/engines/table-engines/integrations/azure-queue.md b/docs/en/engines/table-engines/integrations/azure-queue.md index b5259336a8b..2e5889c7485 100644 --- a/docs/en/engines/table-engines/integrations/azure-queue.md +++ b/docs/en/engines/table-engines/integrations/azure-queue.md @@ -36,6 +36,7 @@ SETTINGS ## Settings {#settings} The set of supported settings is the same as for `S3Queue` table engine, but without `s3queue_` prefix. See [full list of settings settings](../../../engines/table-engines/integrations/s3queue.md#settings). +To get a list of settings, configured for the table, use `system.s3_queue_settings` table. Available from `24.10`. ## Description {#description} diff --git a/docs/en/engines/table-engines/integrations/s3queue.md b/docs/en/engines/table-engines/integrations/s3queue.md index 1916c33272e..11fc357d222 100644 --- a/docs/en/engines/table-engines/integrations/s3queue.md +++ b/docs/en/engines/table-engines/integrations/s3queue.md @@ -69,6 +69,8 @@ SETTINGS ## Settings {#settings} +To get a list of settings, configured for the table, use `system.s3_queue_settings` table. Available from `24.10`. + ### mode {#mode} Possible values: diff --git a/docs/en/engines/table-engines/mergetree-family/aggregatingmergetree.md b/docs/en/engines/table-engines/mergetree-family/aggregatingmergetree.md index 7a449f400fd..819038ee32c 100644 --- a/docs/en/engines/table-engines/mergetree-family/aggregatingmergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/aggregatingmergetree.md @@ -37,7 +37,7 @@ For a description of request parameters, see [request description](../../../sql- **Query clauses** -When creating an `AggregatingMergeTree` table the same [clauses](../../../engines/table-engines/mergetree-family/mergetree.md) are required, as when creating a `MergeTree` table. +When creating an `AggregatingMergeTree` table, the same [clauses](../../../engines/table-engines/mergetree-family/mergetree.md) are required as when creating a `MergeTree` table.
@@ -62,19 +62,19 @@ All of the parameters have the same meaning as in `MergeTree`. ## SELECT and INSERT {#select-and-insert} To insert data, use [INSERT SELECT](../../../sql-reference/statements/insert-into.md) query with aggregate -State- functions. -When selecting data from `AggregatingMergeTree` table, use `GROUP BY` clause and the same aggregate functions as when inserting data, but using `-Merge` suffix. +When selecting data from `AggregatingMergeTree` table, use `GROUP BY` clause and the same aggregate functions as when inserting data, but using the `-Merge` suffix. -In the results of `SELECT` query, the values of `AggregateFunction` type have implementation-specific binary representation for all of the ClickHouse output formats. If dump data into, for example, `TabSeparated` format with `SELECT` query then this dump can be loaded back using `INSERT` query. +In the results of `SELECT` query, the values of `AggregateFunction` type have implementation-specific binary representation for all of the ClickHouse output formats. For example, if you dump data into `TabSeparated` format with a `SELECT` query, then this dump can be loaded back using an `INSERT` query. ## Example of an Aggregated Materialized View {#example-of-an-aggregated-materialized-view} -The following examples assumes that you have a database named `test` so make sure you create that if it doesn't already exist: +The following example assumes that you have a database named `test`, so create it if it doesn't already exist: ```sql CREATE DATABASE test; ``` -We will create the table `test.visits` that contain the raw data: +Now create the table `test.visits` that contains the raw data: ``` sql CREATE TABLE test.visits @@ -86,9 +86,9 @@ CREATE TABLE test.visits ) ENGINE = MergeTree ORDER BY (StartDate, CounterID); ``` -Next, we need to create an `AggregatingMergeTree` table that will store `AggregationFunction`s that keep track of the total number of visits and the number of unique users. +Next, you need an `AggregatingMergeTree` table that will store `AggregationFunction`s that keep track of the total number of visits and the number of unique users. -`AggregatingMergeTree` materialized view that watches the `test.visits` table, and use the `AggregateFunction` type: +Create an `AggregatingMergeTree` materialized view that watches the `test.visits` table, and uses the `AggregateFunction` type: ``` sql CREATE TABLE test.agg_visits ( @@ -100,7 +100,7 @@ CREATE TABLE test.agg_visits ( ENGINE = AggregatingMergeTree() ORDER BY (StartDate, CounterID); ``` -And then let's create a materialized view that populates `test.agg_visits` from `test.visits` : +Create a materialized view that populates `test.agg_visits` from `test.visits`: ```sql CREATE MATERIALIZED VIEW test.visits_mv TO test.agg_visits @@ -113,7 +113,7 @@ FROM test.visits GROUP BY StartDate, CounterID; ``` -Inserting data into the `test.visits` table. +Insert data into the `test.visits` table: ``` sql INSERT INTO test.visits (StartDate, CounterID, Sign, UserID) @@ -122,7 +122,7 @@ INSERT INTO test.visits (StartDate, CounterID, Sign, UserID) The data is inserted in both `test.visits` and `test.agg_visits`. -To get the aggregated data, we need to execute a query such as `SELECT ... GROUP BY ...` from the materialized view `test.mv_visits`: +To get the aggregated data, execute a query such as `SELECT ... GROUP BY ...` from the materialized view `test.mv_visits`: ```sql SELECT @@ -140,14 +140,14 @@ ORDER BY StartDate; └─────────────────────────┴────────┴───────┘ ``` -And how about if we add another couple of records to `test.visits`, but this time we'll use a different timestamp for one of the records: +Add another couple of records to `test.visits`, but this time try using a different timestamp for one of the records: ```sql INSERT INTO test.visits (StartDate, CounterID, Sign, UserID) VALUES (1669446031000, 2, 5, 10), (1667446031000, 3, 7, 5); ``` -If we then run the `SELECT` query again, we'll see the following output: +Run the `SELECT` query again, which will return the following output: ```text ┌───────────────StartDate─┬─Visits─┬─Users─┐ diff --git a/docs/en/getting-started/example-datasets/brown-benchmark.md b/docs/en/getting-started/example-datasets/brown-benchmark.md index 3fbbe2376e8..6233a7e80ad 100644 --- a/docs/en/getting-started/example-datasets/brown-benchmark.md +++ b/docs/en/getting-started/example-datasets/brown-benchmark.md @@ -453,4 +453,4 @@ ORDER BY yr, mo; ``` -The data is also available for interactive queries in the [Playground](https://play.clickhouse.com/play?user=play), [example](https://play.clickhouse.com/play?user=play#U0VMRUNUIG1hY2hpbmVfbmFtZSwKICAgICAgIE1JTihjcHUpIEFTIGNwdV9taW4sCiAgICAgICBNQVgoY3B1KSBBUyBjcHVfbWF4LAogICAgICAgQVZHKGNwdSkgQVMgY3B1X2F2ZywKICAgICAgIE1JTihuZXRfaW4pIEFTIG5ldF9pbl9taW4sCiAgICAgICBNQVgobmV0X2luKSBBUyBuZXRfaW5fbWF4LAogICAgICAgQVZHKG5ldF9pbikgQVMgbmV0X2luX2F2ZywKICAgICAgIE1JTihuZXRfb3V0KSBBUyBuZXRfb3V0X21pbiwKICAgICAgIE1BWChuZXRfb3V0KSBBUyBuZXRfb3V0X21heCwKICAgICAgIEFWRyhuZXRfb3V0KSBBUyBuZXRfb3V0X2F2ZwpGUk9NICgKICBTRUxFQ1QgbWFjaGluZV9uYW1lLAogICAgICAgICBDT0FMRVNDRShjcHVfdXNlciwgMC4wKSBBUyBjcHUsCiAgICAgICAgIENPQUxFU0NFKGJ5dGVzX2luLCAwLjApIEFTIG5ldF9pbiwKICAgICAgICAgQ09BTEVTQ0UoYnl0ZXNfb3V0LCAwLjApIEFTIG5ldF9vdXQKICBGUk9NIG1nYmVuY2gubG9nczEKICBXSEVSRSBtYWNoaW5lX25hbWUgSU4gKCdhbmFuc2knLCdhcmFnb2cnLCd1cmQnKQogICAgQU5EIGxvZ190aW1lID49IFRJTUVTVEFNUCAnMjAxNy0wMS0xMSAwMDowMDowMCcKKSBBUyByCkdST1VQIEJZIG1hY2hpbmVfbmFtZQ==). +The data is also available for interactive queries in the [Playground](https://sql.clickhouse.com), [example](https://sql.clickhouse.com?query_id=1MXMHASDLEQIP4P1D1STND). diff --git a/docs/en/getting-started/example-datasets/cell-towers.md b/docs/en/getting-started/example-datasets/cell-towers.md index 94fa6998f5d..ecfd21e9d2c 100644 --- a/docs/en/getting-started/example-datasets/cell-towers.md +++ b/docs/en/getting-started/example-datasets/cell-towers.md @@ -360,9 +360,9 @@ This screenshot shows cell tower locations with LTE, UMTS, and GSM radios. The ![Dashboard of cell towers by radio type in mcc 204](@site/docs/en/getting-started/example-datasets/images/superset-cell-tower-dashboard.png) :::tip -The data is also available for interactive queries in the [Playground](https://play.clickhouse.com/play?user=play). +The data is also available for interactive queries in the [Playground](https://sql.clickhouse.com). -This [example](https://play.clickhouse.com/play?user=play#U0VMRUNUIG1jYywgY291bnQoKSBGUk9NIGNlbGxfdG93ZXJzIEdST1VQIEJZIG1jYyBPUkRFUiBCWSBjb3VudCgpIERFU0M=) will populate the username and even the query for you. +This [example](https://sql.clickhouse.com?query_id=UV8M4MAGS2PWAUOAYAAARM) will populate the username and even the query for you. Although you cannot create tables in the Playground, you can run all of the queries and even use Superset (adjust the host name and port number). ::: diff --git a/docs/en/getting-started/example-datasets/github.md b/docs/en/getting-started/example-datasets/github.md index e5ffb15bb9a..26a91eee34d 100644 --- a/docs/en/getting-started/example-datasets/github.md +++ b/docs/en/getting-started/example-datasets/github.md @@ -244,13 +244,13 @@ FROM s3('https://datasets-documentation.s3.amazonaws.com/github/commits/clickhou The tool suggests several queries via its help output. We have answered these in addition to some additional supplementary questions of interest. These queries are of approximately increasing complexity vs. the tool's arbitrary order. -This dataset is available in [play.clickhouse.com](https://play.clickhouse.com/play?user=play#U0hPVyBUQUJMRVMgSU4gZ2l0X2NsaWNraG91c2U=) in the `git_clickhouse` databases. We provide a link to this environment for all queries, adapting the database name as required. Note that play results may vary from the those presented here due to differences in time of data collection. +This dataset is available in [play.clickhouse.com](https://sql.clickhouse.com?query_id=DCQPNPAIMAQXRLHYURLKVJ) in the `git_clickhouse` databases. We provide a link to this environment for all queries, adapting the database name as required. Note that play results may vary from the those presented here due to differences in time of data collection. ## History of a single file The simplest of queries. Here we look at all commit messages for the `StorageReplicatedMergeTree.cpp`. Since these are likely more interesting, we sort by the most recent messages first. -[play](https://play.clickhouse.com/play?user=play#U0VMRUNUCiAgICB0aW1lLAogICAgc3Vic3RyaW5nKGNvbW1pdF9oYXNoLCAxLCAxMSkgQVMgY29tbWl0LAogICAgY2hhbmdlX3R5cGUsCiAgICBhdXRob3IsCiAgICBwYXRoLAogICAgb2xkX3BhdGgsCiAgICBsaW5lc19hZGRlZCwKICAgIGxpbmVzX2RlbGV0ZWQsCiAgICBjb21taXRfbWVzc2FnZQpGUk9NIGdpdF9jbGlja2hvdXNlLmZpbGVfY2hhbmdlcwpXSEVSRSBwYXRoID0gJ3NyYy9TdG9yYWdlcy9TdG9yYWdlUmVwbGljYXRlZE1lcmdlVHJlZS5jcHAnCk9SREVSIEJZIHRpbWUgREVTQwpMSU1JVCAxMA==) +[play](https://sql.clickhouse.com?query_id=COAZRFX2YFULDBXRQTCQ1S) ```sql SELECT @@ -287,7 +287,7 @@ LIMIT 10 We can also review the line changes, excluding renames i.e. we won't show changes before a rename event when the file existed under a different name: -[play](https://play.clickhouse.com/play?user=play#U0VMRUNUCiAgICB0aW1lLAogICAgc3Vic3RyaW5nKGNvbW1pdF9oYXNoLCAxLCAxMSkgQVMgY29tbWl0LAogICAgc2lnbiwKICAgIGxpbmVfbnVtYmVyX29sZCwKICAgIGxpbmVfbnVtYmVyX25ldywKICAgIGF1dGhvciwKICAgIGxpbmUKRlJPTSBnaXRfY2xpY2tob3VzZS5saW5lX2NoYW5nZXMKV0hFUkUgcGF0aCA9ICdzcmMvU3RvcmFnZXMvU3RvcmFnZVJlcGxpY2F0ZWRNZXJnZVRyZWUuY3BwJwpPUkRFUiBCWSBsaW5lX251bWJlcl9uZXcgQVNDCkxJTUlUIDEw) +[play](https://sql.clickhouse.com?query_id=AKS9SYLARFMZCHGAAQNEBN) ```sql SELECT @@ -327,7 +327,7 @@ This is important for later analysis when we only want to consider the current f **Note there appears to have been a broken commit history in relation to files under the `dbms`, `libs`, `tests/testflows/` directories during their renames. We also thus exclude these.** -[play](https://play.clickhouse.com/play?user=play#U0VMRUNUIHBhdGgKRlJPTQooCiAgICBTRUxFQ1QKICAgICAgICBvbGRfcGF0aCBBUyBwYXRoLAogICAgICAgIG1heCh0aW1lKSBBUyBsYXN0X3RpbWUsCiAgICAgICAgMiBBUyBjaGFuZ2VfdHlwZQogICAgRlJPTSBnaXRfY2xpY2tob3VzZS5maWxlX2NoYW5nZXMKICAgIEdST1VQIEJZIG9sZF9wYXRoCiAgICBVTklPTiBBTEwKICAgIFNFTEVDVAogICAgICAgIHBhdGgsCiAgICAgICAgbWF4KHRpbWUpIEFTIGxhc3RfdGltZSwKICAgICAgICBhcmdNYXgoY2hhbmdlX3R5cGUsIHRpbWUpIEFTIGNoYW5nZV90eXBlCiAgICBGUk9NIGdpdF9jbGlja2hvdXNlLmZpbGVfY2hhbmdlcwogICAgR1JPVVAgQlkgcGF0aAopCkdST1VQIEJZIHBhdGgKSEFWSU5HIChhcmdNYXgoY2hhbmdlX3R5cGUsIGxhc3RfdGltZSkgIT0gMikgQU5EIE5PVCBtYXRjaChwYXRoLCAnKF5kYm1zLyl8KF5saWJzLyl8KF50ZXN0cy90ZXN0Zmxvd3MvKXwoXnByb2dyYW1zL3NlcnZlci9zdG9yZS8pJykgT1JERVIgQlkgcGF0aApMSU1JVCAxMA==) +[play](https://sql.clickhouse.com?query_id=2HNFWPCFWEEY92WTAPMA7W) ```sql SELECT path @@ -369,7 +369,7 @@ LIMIT 10 Note that this allows for files to be renamed and then re-renamed to their original values. First we aggregate `old_path` for a list of deleted files as a result of renaming. We union this with the last operation for every `path`. Finally, we filter this list to those where the final event is not a `Delete`. -[play](https://play.clickhouse.com/play?user=play#U0VMRUNUIHVuaXEocGF0aCkKRlJPTQooCiAgICBTRUxFQ1QgcGF0aAogICAgRlJPTQogICAgKAogICAgICAgIFNFTEVDVAogICAgICAgICAgICBvbGRfcGF0aCBBUyBwYXRoLAogICAgICAgICAgICBtYXgodGltZSkgQVMgbGFzdF90aW1lLAogICAgICAgICAgICAyIEFTIGNoYW5nZV90eXBlCiAgICAgICAgRlJPTSBnaXRfY2xpY2tob3VzZS5maWxlX2NoYW5nZXMKICAgICAgICBHUk9VUCBCWSBvbGRfcGF0aAogICAgICAgIFVOSU9OIEFMTAogICAgICAgIFNFTEVDVAogICAgICAgICAgICBwYXRoLAogICAgICAgICAgICBtYXgodGltZSkgQVMgbGFzdF90aW1lLAogICAgICAgICAgICBhcmdNYXgoY2hhbmdlX3R5cGUsIHRpbWUpIEFTIGNoYW5nZV90eXBlCiAgICAgICAgRlJPTSBnaXRfY2xpY2tob3VzZS5maWxlX2NoYW5nZXMKICAgICAgICBHUk9VUCBCWSBwYXRoCiAgICApCiAgICBHUk9VUCBCWSBwYXRoCiAgICBIQVZJTkcgKGFyZ01heChjaGFuZ2VfdHlwZSwgbGFzdF90aW1lKSAhPSAyKSBBTkQgTk9UIG1hdGNoKHBhdGgsICcoXmRibXMvKXwoXmxpYnMvKXwoXnRlc3RzL3Rlc3RmbG93cy8pfChecHJvZ3JhbXMvc2VydmVyL3N0b3JlLyknKSBPUkRFUiBCWSBwYXRoCikK) +[play](https://sql.clickhouse.com?query_id=1OXCKMOH2JVMSHD3NS2WW6) ```sql SELECT uniq(path) @@ -419,7 +419,7 @@ The difference here is caused by a few factors: - A rename can occur alongside other modifications to the file. These are listed as separate events in file_changes but with the same time. The `argMax` function has no way of distinguishing these - it picks the first value. The natural ordering of the inserts (the only means of knowing the correct order) is not maintained across the union so modified events can be selected. For example, below the `src/Functions/geometryFromColumn.h` file has several modifications before being renamed to `src/Functions/geometryConverters.h`. Our current solution may pick a Modify event as the latest change causing `src/Functions/geometryFromColumn.h` to be retained. -[play](https://play.clickhouse.com/play?user=play#U0VMRUNUCiAgICAgIGNoYW5nZV90eXBlLAogICAgICBwYXRoLAogICAgICBvbGRfcGF0aCwKICAgICAgdGltZSwKICAgICAgY29tbWl0X2hhc2gKICBGUk9NIGdpdF9jbGlja2hvdXNlLmZpbGVfY2hhbmdlcwogIFdIRVJFIChwYXRoID0gJ3NyYy9GdW5jdGlvbnMvZ2VvbWV0cnlGcm9tQ29sdW1uLmgnKSBPUiAob2xkX3BhdGggPSAnc3JjL0Z1bmN0aW9ucy9nZW9tZXRyeUZyb21Db2x1bW4uaCcpCg==) +[play](https://sql.clickhouse.com?query_id=SCXWMR9GBMJ9UNZYQXQBFA) ```sql SELECT @@ -454,7 +454,7 @@ These differences shouldn't meaningfully impact our analysis. **We welcome impro Limiting to current files, we consider the number of modifications to be the sum of deletes and additions. -[play](https://play.clickhouse.com/play?user=play#V0lUSCBjdXJyZW50X2ZpbGVzIEFTCiAgICAoCiAgICAgICAgU0VMRUNUIHBhdGgKICAgICAgICBGUk9NCiAgICAgICAgKAogICAgICAgICAgICBTRUxFQ1QKICAgICAgICAgICAgICAgIG9sZF9wYXRoIEFTIHBhdGgsCiAgICAgICAgICAgICAgICBtYXgodGltZSkgQVMgbGFzdF90aW1lLAogICAgICAgICAgICAgICAgMiBBUyBjaGFuZ2VfdHlwZQogICAgICAgICAgICBGUk9NIGdpdF9jbGlja2hvdXNlLmZpbGVfY2hhbmdlcwogICAgICAgICAgICBHUk9VUCBCWSBvbGRfcGF0aAogICAgICAgICAgICBVTklPTiBBTEwKICAgICAgICAgICAgU0VMRUNUCiAgICAgICAgICAgICAgICBwYXRoLAogICAgICAgICAgICAgICAgbWF4KHRpbWUpIEFTIGxhc3RfdGltZSwKICAgICAgICAgICAgICAgIGFyZ01heChjaGFuZ2VfdHlwZSwgdGltZSkgQVMgY2hhbmdlX3R5cGUKICAgICAgICAgICAgRlJPTSBnaXRfY2xpY2tob3VzZS5maWxlX2NoYW5nZXMKICAgICAgICAgICAgR1JPVVAgQlkgcGF0aAogICAgICAgICkKICAgICAgICBHUk9VUCBCWSBwYXRoCiAgICAgICAgSEFWSU5HIChhcmdNYXgoY2hhbmdlX3R5cGUsIGxhc3RfdGltZSkgIT0gMikgQU5EIChOT1QgbWF0Y2gocGF0aCwgJyheZGJtcy8pfChebGlicy8pfChedGVzdHMvdGVzdGZsb3dzLyl8KF5wcm9ncmFtcy9zZXJ2ZXIvc3RvcmUvKScpKQogICAgICAgIE9SREVSIEJZIHBhdGggQVNDCiAgICApClNFTEVDVAogICAgcGF0aCwKICAgIHN1bShsaW5lc19hZGRlZCkgKyBzdW0obGluZXNfZGVsZXRlZCkgQVMgbW9kaWZpY2F0aW9ucwpGUk9NIGdpdF9jbGlja2hvdXNlLmZpbGVfY2hhbmdlcwpXSEVSRSAocGF0aCBJTiAoY3VycmVudF9maWxlcykpIEFORCAoZmlsZV9leHRlbnNpb24gSU4gKCdoJywgJ2NwcCcsICdzcWwnKSkKR1JPVVAgQlkgcGF0aApPUkRFUiBCWSBtb2RpZmljYXRpb25zIERFU0MKTElNSVQgMTA=) +[play](https://sql.clickhouse.com?query_id=MHXPSBNPTDMJYR3OYSXVR7) ```sql WITH current_files AS @@ -507,7 +507,7 @@ LIMIT 10 ## What day of the week do commits usually occur? -[play](https://play.clickhouse.com/play?user=play#U0VMRUNUCiAgICBkYXlfb2Zfd2VlaywKICAgIGNvdW50KCkgQVMgYwpGUk9NIGdpdF9jbGlja2hvdXNlLmNvbW1pdHMKR1JPVVAgQlkgZGF5T2ZXZWVrKHRpbWUpIEFTIGRheV9vZl93ZWVrCg==) +[play](https://sql.clickhouse.com?query_id=GED2STFSYJDRAA59H8RLIV) ```sql SELECT @@ -534,7 +534,7 @@ This makes sense with some productivity drop-off on Fridays. Great to see people This would produce a large query result that is unrealistic to show or visualize if unfiltered. We, therefore, allow a file or subdirectory to be filtered in the following example. Here we group by week using the `toStartOfWeek` function - adapt as required. -[play](https://play.clickhouse.com/play?user=play#U0VMRUNUCiAgICB3ZWVrLAogICAgc3VtKGxpbmVzX2FkZGVkKSBBUyBsaW5lc19hZGRlZCwKICAgIHN1bShsaW5lc19kZWxldGVkKSBBUyBsaW5lc19kZWxldGVkLAogICAgdW5pcShjb21taXRfaGFzaCkgQVMgbnVtX2NvbW1pdHMsCiAgICB1bmlxKGF1dGhvcikgQVMgYXV0aG9ycwpGUk9NIGdpdF9jbGlja2hvdXNlLmZpbGVfY2hhbmdlcwpXSEVSRSBwYXRoIExJS0UgJ3NyYy9TdG9yYWdlcyUnCkdST1VQIEJZIHRvU3RhcnRPZldlZWsodGltZSkgQVMgd2VlawpPUkRFUiBCWSB3ZWVrIEFTQwpMSU1JVCAxMAo=) +[play](https://sql.clickhouse.com?query_id=REZRXDVU7CAWT5WKNJSTNY) ```sql SELECT @@ -578,7 +578,7 @@ This data visualizes well. Below we use Superset. Limit to current files only. -[play](https://play.clickhouse.com/play?user=play#V0lUSCBjdXJyZW50X2ZpbGVzIEFTCiAgICAoCiAgICAgICAgU0VMRUNUIHBhdGgKICAgICAgICBGUk9NCiAgICAgICAgKAogICAgICAgICAgICBTRUxFQ1QKICAgICAgICAgICAgICAgIG9sZF9wYXRoIEFTIHBhdGgsCiAgICAgICAgICAgICAgICBtYXgodGltZSkgQVMgbGFzdF90aW1lLAogICAgICAgICAgICAgICAgMiBBUyBjaGFuZ2VfdHlwZQogICAgICAgICAgICBGUk9NIGdpdF9jbGlja2hvdXNlLmZpbGVfY2hhbmdlcwogICAgICAgICAgICBHUk9VUCBCWSBvbGRfcGF0aAogICAgICAgICAgICBVTklPTiBBTEwKICAgICAgICAgICAgU0VMRUNUCiAgICAgICAgICAgICAgICBwYXRoLAogICAgICAgICAgICAgICAgbWF4KHRpbWUpIEFTIGxhc3RfdGltZSwKICAgICAgICAgICAgICAgIGFyZ01heChjaGFuZ2VfdHlwZSwgdGltZSkgQVMgY2hhbmdlX3R5cGUKICAgICAgICAgICAgRlJPTSBnaXRfY2xpY2tob3VzZS5maWxlX2NoYW5nZXMKICAgICAgICAgICAgR1JPVVAgQlkgcGF0aAogICAgICAgICkKICAgICAgICBHUk9VUCBCWSBwYXRoCiAgICAgICAgSEFWSU5HIChhcmdNYXgoY2hhbmdlX3R5cGUsIGxhc3RfdGltZSkgIT0gMikgQU5EIChOT1QgbWF0Y2gocGF0aCwgJyheZGJtcy8pfChebGlicy8pfChedGVzdHMvdGVzdGZsb3dzLyl8KF5wcm9ncmFtcy9zZXJ2ZXIvc3RvcmUvKScpKQogICAgICAgIE9SREVSIEJZIHBhdGggQVNDCiAgICApClNFTEVDVAogICAgcGF0aCwKICAgIHVuaXEoYXV0aG9yKSBBUyBudW1fYXV0aG9ycwpGUk9NIGdpdF9jbGlja2hvdXNlLmZpbGVfY2hhbmdlcwpXSEVSRSBwYXRoIElOIChjdXJyZW50X2ZpbGVzKQpHUk9VUCBCWSBwYXRoCk9SREVSIEJZIG51bV9hdXRob3JzIERFU0MKTElNSVQgMTA=) +[play](https://sql.clickhouse.com?query_id=CYQFNQNK9TAMPU2OZ8KG5Y) ```sql WITH current_files AS @@ -633,7 +633,7 @@ LIMIT 10 Limited to current files only. -[play](https://play.clickhouse.com/play?user=play#V0lUSCBjdXJyZW50X2ZpbGVzIEFTCiAgICAoCiAgICAgICAgU0VMRUNUIHBhdGgKICAgICAgICBGUk9NCiAgICAgICAgKAogICAgICAgICAgICBTRUxFQ1QKICAgICAgICAgICAgICAgIG9sZF9wYXRoIEFTIHBhdGgsCiAgICAgICAgICAgICAgICBtYXgodGltZSkgQVMgbGFzdF90aW1lLAogICAgICAgICAgICAgICAgMiBBUyBjaGFuZ2VfdHlwZQogICAgICAgICAgICBGUk9NIGdpdF9jbGlja2hvdXNlLmZpbGVfY2hhbmdlcwogICAgICAgICAgICBHUk9VUCBCWSBvbGRfcGF0aAogICAgICAgICAgICBVTklPTiBBTEwKICAgICAgICAgICAgU0VMRUNUCiAgICAgICAgICAgICAgICBwYXRoLAogICAgICAgICAgICAgICAgbWF4KHRpbWUpIEFTIGxhc3RfdGltZSwKICAgICAgICAgICAgICAgIGFyZ01heChjaGFuZ2VfdHlwZSwgdGltZSkgQVMgY2hhbmdlX3R5cGUKICAgICAgICAgICAgRlJPTSBnaXRfY2xpY2tob3VzZS5maWxlX2NoYW5nZXMKICAgICAgICAgICAgR1JPVVAgQlkgcGF0aAogICAgICAgICkKICAgICAgICBHUk9VUCBCWSBwYXRoCiAgICAgICAgSEFWSU5HIChhcmdNYXgoY2hhbmdlX3R5cGUsIGxhc3RfdGltZSkgIT0gMikgQU5EIChOT1QgbWF0Y2gocGF0aCwgJyheZGJtcy8pfChebGlicy8pfChedGVzdHMvdGVzdGZsb3dzLyl8KF5wcm9ncmFtcy9zZXJ2ZXIvc3RvcmUvKScpKQogICAgICAgIE9SREVSIEJZIHBhdGggQVNDCiAgICApClNFTEVDVAogICAgYW55KHBhdGgpIEFTIGZpbGVfcGF0aCwKICAgIGxpbmUsCiAgICBtYXgodGltZSkgQVMgbGF0ZXN0X2NoYW5nZSwKICAgIGFueShmaWxlX2NoYW5nZV90eXBlKQpGUk9NIGdpdF9jbGlja2hvdXNlLmxpbmVfY2hhbmdlcwpXSEVSRSBwYXRoIElOIChjdXJyZW50X2ZpbGVzKQpHUk9VUCBCWSBsaW5lCk9SREVSIEJZIGxhdGVzdF9jaGFuZ2UgQVNDCkxJTUlUIDEw) +[play](https://sql.clickhouse.com?query_id=VWPBPGRZVGTHOCQYWNQZNT) ```sql WITH current_files AS @@ -690,7 +690,7 @@ LIMIT 10 Limited to current files only. -[play](https://play.clickhouse.com/play?user=play#V0lUSCBjdXJyZW50X2ZpbGVzIEFTCiAgICAoCiAgICAgICAgU0VMRUNUIHBhdGgKICAgICAgICBGUk9NCiAgICAgICAgKAogICAgICAgICAgICBTRUxFQ1QKICAgICAgICAgICAgICAgIG9sZF9wYXRoIEFTIHBhdGgsCiAgICAgICAgICAgICAgICBtYXgodGltZSkgQVMgbGFzdF90aW1lLAogICAgICAgICAgICAgICAgMiBBUyBjaGFuZ2VfdHlwZQogICAgICAgICAgICBGUk9NIGdpdF9jbGlja2hvdXNlLmZpbGVfY2hhbmdlcwogICAgICAgICAgICBHUk9VUCBCWSBvbGRfcGF0aAogICAgICAgICAgICBVTklPTiBBTEwKICAgICAgICAgICAgU0VMRUNUCiAgICAgICAgICAgICAgICBwYXRoLAogICAgICAgICAgICAgICAgbWF4KHRpbWUpIEFTIGxhc3RfdGltZSwKICAgICAgICAgICAgICAgIGFyZ01heChjaGFuZ2VfdHlwZSwgdGltZSkgQVMgY2hhbmdlX3R5cGUKICAgICAgICAgICAgRlJPTSBnaXRfY2xpY2tob3VzZS5maWxlX2NoYW5nZXMKICAgICAgICAgICAgR1JPVVAgQlkgcGF0aAogICAgICAgICkKICAgICAgICBHUk9VUCBCWSBwYXRoCiAgICAgICAgSEFWSU5HIChhcmdNYXgoY2hhbmdlX3R5cGUsIGxhc3RfdGltZSkgIT0gMikgQU5EIChOT1QgbWF0Y2gocGF0aCwgJyheZGJtcy8pfChebGlicy8pfChedGVzdHMvdGVzdGZsb3dzLyl8KF5wcm9ncmFtcy9zZXJ2ZXIvc3RvcmUvKScpKQogICAgICAgIE9SREVSIEJZIHBhdGggQVNDCiAgICApClNFTEVDVAogICAgY291bnQoKSBBUyBjLAogICAgcGF0aCwKICAgIG1heCh0aW1lKSBBUyBsYXRlc3RfY2hhbmdlCkZST00gZ2l0X2NsaWNraG91c2UuZmlsZV9jaGFuZ2VzCldIRVJFIHBhdGggSU4gKGN1cnJlbnRfZmlsZXMpCkdST1VQIEJZIHBhdGgKT1JERVIgQlkgYyBERVNDCkxJTUlUIDEw) +[play](https://sql.clickhouse.com?query_id=VWPBPGRZVGTHOCQYWNQZNT) ```sql WITH current_files AS @@ -750,7 +750,7 @@ Our core data structure, the Merge Tree, is obviously under constant evolution w Do we write more docs at certain times of the month e.g., around release dates? We can use the `countIf` function to compute a simple ratio, visualizing the result using the `bar` function. -[play](https://play.clickhouse.com/play?user=play#U0VMRUNUCiAgICBkYXksCiAgICBiYXIoZG9jc19yYXRpbyAqIDEwMDAsIDAsIDEwMCwgMTAwKSBBUyBiYXIKRlJPTQooCiAgICBTRUxFQ1QKICAgICAgICBkYXksCiAgICAgICAgY291bnRJZihmaWxlX2V4dGVuc2lvbiBJTiAoJ2gnLCAnY3BwJywgJ3NxbCcpKSBBUyBjb2RlLAogICAgICAgIGNvdW50SWYoZmlsZV9leHRlbnNpb24gPSAnbWQnKSBBUyBkb2NzLAogICAgICAgIGRvY3MgLyAoY29kZSArIGRvY3MpIEFTIGRvY3NfcmF0aW8KICAgIEZST00gZ2l0X2NsaWNraG91c2UubGluZV9jaGFuZ2VzCiAgICBXSEVSRSAoc2lnbiA9IDEpIEFORCAoZmlsZV9leHRlbnNpb24gSU4gKCdoJywgJ2NwcCcsICdzcWwnLCAnbWQnKSkKICAgIEdST1VQIEJZIGRheU9mTW9udGgodGltZSkgQVMgZGF5CikK) +[play](https://sql.clickhouse.com?query_id=BA4RZUXUHNQBH9YK7F2T9J) ```sql SELECT @@ -811,7 +811,7 @@ Maybe a little more near the end of the month, but overall we keep a good even d We consider diversity here to be the number of unique files an author has contributed to. -[play](https://play.clickhouse.com/play?user=play#U0VMRUNUCiAgICBhdXRob3IsCiAgICB1bmlxKHBhdGgpIEFTIG51bV9maWxlcwpGUk9NIGdpdF9jbGlja2hvdXNlLmZpbGVfY2hhbmdlcwpXSEVSRSAoY2hhbmdlX3R5cGUgSU4gKCdBZGQnLCAnTW9kaWZ5JykpIEFORCAoZmlsZV9leHRlbnNpb24gSU4gKCdoJywgJ2NwcCcsICdzcWwnKSkKR1JPVVAgQlkgYXV0aG9yCk9SREVSIEJZIG51bV9maWxlcyBERVNDCkxJTUlUIDEw) +[play](https://sql.clickhouse.com?query_id=MT8WBABUKYBYSBA78W5TML) ```sql SELECT @@ -841,7 +841,7 @@ LIMIT 10 Let's see who has the most diverse commits in their recent work. Rather than limit by date, we'll restrict to an author's last N commits (in this case, we've used 3 but feel free to modify): -[play](https://play.clickhouse.com/play?user=play#U0VMRUNUCiAgICBhdXRob3IsCiAgICBzdW0obnVtX2ZpbGVzX2NvbW1pdCkgQVMgbnVtX2ZpbGVzCkZST00KKAogICAgU0VMRUNUCiAgICAgICAgYXV0aG9yLAogICAgICAgIGNvbW1pdF9oYXNoLAogICAgICAgIHVuaXEocGF0aCkgQVMgbnVtX2ZpbGVzX2NvbW1pdCwKICAgICAgICBtYXgodGltZSkgQVMgY29tbWl0X3RpbWUKICAgIEZST00gZ2l0X2NsaWNraG91c2UuZmlsZV9jaGFuZ2VzCiAgICBXSEVSRSAoY2hhbmdlX3R5cGUgSU4gKCdBZGQnLCAnTW9kaWZ5JykpIEFORCAoZmlsZV9leHRlbnNpb24gSU4gKCdoJywgJ2NwcCcsICdzcWwnKSkKICAgIEdST1VQIEJZCiAgICAgICAgYXV0aG9yLAogICAgICAgIGNvbW1pdF9oYXNoCiAgICBPUkRFUiBCWQogICAgICAgIGF1dGhvciBBU0MsCiAgICAgICAgY29tbWl0X3RpbWUgREVTQwogICAgTElNSVQgMyBCWSBhdXRob3IKKQpHUk9VUCBCWSBhdXRob3IKT1JERVIgQlkgbnVtX2ZpbGVzIERFU0MKTElNSVQgMTA=) +[play](https://sql.clickhouse.com?query_id=4Q3D67FWRIVWTY8EIDDE5U) ```sql SELECT @@ -888,7 +888,7 @@ LIMIT 10 Here we select our founder [Alexey Milovidov](https://github.com/alexey-milovidov) and limit our analysis to current files. -[play](https://play.clickhouse.com/play?user=play#V0lUSCBjdXJyZW50X2ZpbGVzIEFTCiAgICAoCiAgICAgICAgU0VMRUNUIHBhdGgKICAgICAgICBGUk9NCiAgICAgICAgKAogICAgICAgICAgICBTRUxFQ1QKICAgICAgICAgICAgICAgIG9sZF9wYXRoIEFTIHBhdGgsCiAgICAgICAgICAgICAgICBtYXgodGltZSkgQVMgbGFzdF90aW1lLAogICAgICAgICAgICAgICAgMiBBUyBjaGFuZ2VfdHlwZQogICAgICAgICAgICBGUk9NIGdpdF9jbGlja2hvdXNlLmZpbGVfY2hhbmdlcwogICAgICAgICAgICBHUk9VUCBCWSBvbGRfcGF0aAogICAgICAgICAgICBVTklPTiBBTEwKICAgICAgICAgICAgU0VMRUNUCiAgICAgICAgICAgICAgICBwYXRoLAogICAgICAgICAgICAgICAgbWF4KHRpbWUpIEFTIGxhc3RfdGltZSwKICAgICAgICAgICAgICAgIGFyZ01heChjaGFuZ2VfdHlwZSwgdGltZSkgQVMgY2hhbmdlX3R5cGUKICAgICAgICAgICAgRlJPTSBnaXRfY2xpY2tob3VzZS5maWxlX2NoYW5nZXMKICAgICAgICAgICAgR1JPVVAgQlkgcGF0aAogICAgICAgICkKICAgICAgICBHUk9VUCBCWSBwYXRoCiAgICAgICAgSEFWSU5HIChhcmdNYXgoY2hhbmdlX3R5cGUsIGxhc3RfdGltZSkgIT0gMikgQU5EIChOT1QgbWF0Y2gocGF0aCwgJyheZGJtcy8pfChebGlicy8pfChedGVzdHMvdGVzdGZsb3dzLyl8KF5wcm9ncmFtcy9zZXJ2ZXIvc3RvcmUvKScpKQogICAgICAgIE9SREVSIEJZIHBhdGggQVNDCiAgICApClNFTEVDVAogICAgcGF0aCwKICAgIGNvdW50KCkgQVMgYwpGUk9NIGdpdF9jbGlja2hvdXNlLmZpbGVfY2hhbmdlcwpXSEVSRSAoYXV0aG9yID0gJ0FsZXhleSBNaWxvdmlkb3YnKSBBTkQgKHBhdGggSU4gKGN1cnJlbnRfZmlsZXMpKQpHUk9VUCBCWSBwYXRoCk9SREVSIEJZIGMgREVTQwpMSU1JVCAxMA==) +[play](https://sql.clickhouse.com?query_id=OKGZBACRHVGCRAGCZAJKMF) ```sql WITH current_files AS @@ -941,7 +941,7 @@ LIMIT 10 This makes sense because Alexey has been responsible for maintaining the Change log. But what if we use the base name of the file to identify his popular files - this allows for renames and should focus on code contributions. -[play](https://play.clickhouse.com/play?user=play#U0VMRUNUCiAgICBiYXNlLAogICAgY291bnQoKSBBUyBjCkZST00gZ2l0X2NsaWNraG91c2UuZmlsZV9jaGFuZ2VzCldIRVJFIChhdXRob3IgPSAnQWxleGV5IE1pbG92aWRvdicpIEFORCAoZmlsZV9leHRlbnNpb24gSU4gKCdoJywgJ2NwcCcsICdzcWwnKSkKR1JPVVAgQlkgYmFzZW5hbWUocGF0aCkgQVMgYmFzZQpPUkRFUiBCWSBjIERFU0MKTElNSVQgMTA=) +[play](https://sql.clickhouse.com?query_id=P9PBDZGOSVTKXEXU73ZNAJ) ```sql SELECT @@ -976,7 +976,7 @@ For this, we first need to identify the largest files. Estimating this via a ful To estimate, assuming we restrict to current files, we sum line additions and subtract deletions. We can then compute a ratio of length to the number of authors. -[play](https://play.clickhouse.com/play?user=play#V0lUSCBjdXJyZW50X2ZpbGVzIEFTCiAgICAoCiAgICAgICAgU0VMRUNUIHBhdGgKICAgICAgICBGUk9NCiAgICAgICAgKAogICAgICAgICAgICBTRUxFQ1QKICAgICAgICAgICAgICAgIG9sZF9wYXRoIEFTIHBhdGgsCiAgICAgICAgICAgICAgICBtYXgodGltZSkgQVMgbGFzdF90aW1lLAogICAgICAgICAgICAgICAgMiBBUyBjaGFuZ2VfdHlwZQogICAgICAgICAgICBGUk9NIGdpdF9jbGlja2hvdXNlLmZpbGVfY2hhbmdlcwogICAgICAgICAgICBHUk9VUCBCWSBvbGRfcGF0aAogICAgICAgICAgICBVTklPTiBBTEwKICAgICAgICAgICAgU0VMRUNUCiAgICAgICAgICAgICAgICBwYXRoLAogICAgICAgICAgICAgICAgbWF4KHRpbWUpIEFTIGxhc3RfdGltZSwKICAgICAgICAgICAgICAgIGFyZ01heChjaGFuZ2VfdHlwZSwgdGltZSkgQVMgY2hhbmdlX3R5cGUKICAgICAgICAgICAgRlJPTSBnaXRfY2xpY2tob3VzZS5maWxlX2NoYW5nZXMKICAgICAgICAgICAgR1JPVVAgQlkgcGF0aAogICAgICAgICkKICAgICAgICBHUk9VUCBCWSBwYXRoCiAgICAgICAgSEFWSU5HIChhcmdNYXgoY2hhbmdlX3R5cGUsIGxhc3RfdGltZSkgIT0gMikgQU5EIChOT1QgbWF0Y2gocGF0aCwgJyheZGJtcy8pfChebGlicy8pfChedGVzdHMvdGVzdGZsb3dzLyl8KF5wcm9ncmFtcy9zZXJ2ZXIvc3RvcmUvKScpKQogICAgICAgIE9SREVSIEJZIHBhdGggQVNDCiAgICApClNFTEVDVAogICAgcGF0aCwKICAgIHN1bShsaW5lc19hZGRlZCkgLSBzdW0obGluZXNfZGVsZXRlZCkgQVMgbnVtX2xpbmVzLAogICAgdW5pcUV4YWN0KGF1dGhvcikgQVMgbnVtX2F1dGhvcnMsCiAgICBudW1fbGluZXMgLyBudW1fYXV0aG9ycyBBUyBsaW5lc19hdXRob3JfcmF0aW8KRlJPTSBnaXRfY2xpY2tob3VzZS5maWxlX2NoYW5nZXMKV0hFUkUgcGF0aCBJTiAoY3VycmVudF9maWxlcykKR1JPVVAgQlkgcGF0aApPUkRFUiBCWSBsaW5lc19hdXRob3JfcmF0aW8gREVTQwpMSU1JVCAxMA==) +[play](https://sql.clickhouse.com?query_id=PVSDOHZYUMRDDUZFEYJC7J) ```sql WITH current_files AS @@ -1031,7 +1031,7 @@ LIMIT 10 Text dictionaries aren't maybe realistic, so lets restrict to code only via a file extension filter! -[play](https://play.clickhouse.com/play?user=play#V0lUSCBjdXJyZW50X2ZpbGVzIEFTCiAgICAoCiAgICAgICAgU0VMRUNUIHBhdGgKICAgICAgICBGUk9NCiAgICAgICAgKAogICAgICAgICAgICBTRUxFQ1QKICAgICAgICAgICAgICAgIG9sZF9wYXRoIEFTIHBhdGgsCiAgICAgICAgICAgICAgICBtYXgodGltZSkgQVMgbGFzdF90aW1lLAogICAgICAgICAgICAgICAgMiBBUyBjaGFuZ2VfdHlwZQogICAgICAgICAgICBGUk9NIGdpdF9jbGlja2hvdXNlLmZpbGVfY2hhbmdlcwogICAgICAgICAgICBHUk9VUCBCWSBvbGRfcGF0aAogICAgICAgICAgICBVTklPTiBBTEwKICAgICAgICAgICAgU0VMRUNUCiAgICAgICAgICAgICAgICBwYXRoLAogICAgICAgICAgICAgICAgbWF4KHRpbWUpIEFTIGxhc3RfdGltZSwKICAgICAgICAgICAgICAgIGFyZ01heChjaGFuZ2VfdHlwZSwgdGltZSkgQVMgY2hhbmdlX3R5cGUKICAgICAgICAgICAgRlJPTSBnaXRfY2xpY2tob3VzZS5maWxlX2NoYW5nZXMKICAgICAgICAgICAgR1JPVVAgQlkgcGF0aAogICAgICAgICkKICAgICAgICBHUk9VUCBCWSBwYXRoCiAgICAgICAgSEFWSU5HIChhcmdNYXgoY2hhbmdlX3R5cGUsIGxhc3RfdGltZSkgIT0gMikgQU5EIChOT1QgbWF0Y2gocGF0aCwgJyheZGJtcy8pfChebGlicy8pfChedGVzdHMvdGVzdGZsb3dzLyl8KF5wcm9ncmFtcy9zZXJ2ZXIvc3RvcmUvKScpKQogICAgICAgIE9SREVSIEJZIHBhdGggQVNDCiAgICApClNFTEVDVAogICAgcGF0aCwKICAgIHN1bShsaW5lc19hZGRlZCkgLSBzdW0obGluZXNfZGVsZXRlZCkgQVMgbnVtX2xpbmVzLAogICAgdW5pcUV4YWN0KGF1dGhvcikgQVMgbnVtX2F1dGhvcnMsCiAgICBudW1fbGluZXMgLyBudW1fYXV0aG9ycyBBUyBsaW5lc19hdXRob3JfcmF0aW8KRlJPTSBnaXRfY2xpY2tob3VzZS5maWxlX2NoYW5nZXMKV0hFUkUgKHBhdGggSU4gKGN1cnJlbnRfZmlsZXMpKSBBTkQgKGZpbGVfZXh0ZW5zaW9uIElOICgnaCcsICdjcHAnLCAnc3FsJykpCkdST1VQIEJZIHBhdGgKT1JERVIgQlkgbGluZXNfYXV0aG9yX3JhdGlvIERFU0MKTElNSVQgMTA=) +[play](https://sql.clickhouse.com?query_id=BZHGWUIZMPZZUHS5XRBK2M) ```sql WITH current_files AS @@ -1085,7 +1085,7 @@ LIMIT 10 There is some recency bias in this - newer files have fewer opportunities for commits. What about if we restrict to files at least 1 yr old? -[play](https://play.clickhouse.com/play?user=play#V0lUSCBjdXJyZW50X2ZpbGVzIEFTCiAgICAoCiAgICAgICAgU0VMRUNUIHBhdGgKICAgICAgICBGUk9NCiAgICAgICAgKAogICAgICAgICAgICBTRUxFQ1QKICAgICAgICAgICAgICAgIG9sZF9wYXRoIEFTIHBhdGgsCiAgICAgICAgICAgICAgICBtYXgodGltZSkgQVMgbGFzdF90aW1lLAogICAgICAgICAgICAgICAgMiBBUyBjaGFuZ2VfdHlwZQogICAgICAgICAgICBGUk9NIGdpdF9jbGlja2hvdXNlLmZpbGVfY2hhbmdlcwogICAgICAgICAgICBHUk9VUCBCWSBvbGRfcGF0aAogICAgICAgICAgICBVTklPTiBBTEwKICAgICAgICAgICAgU0VMRUNUCiAgICAgICAgICAgICAgICBwYXRoLAogICAgICAgICAgICAgICAgbWF4KHRpbWUpIEFTIGxhc3RfdGltZSwKICAgICAgICAgICAgICAgIGFyZ01heChjaGFuZ2VfdHlwZSwgdGltZSkgQVMgY2hhbmdlX3R5cGUKICAgICAgICAgICAgRlJPTSBnaXRfY2xpY2tob3VzZS5maWxlX2NoYW5nZXMKICAgICAgICAgICAgR1JPVVAgQlkgcGF0aAogICAgICAgICkKICAgICAgICBHUk9VUCBCWSBwYXRoCiAgICAgICAgSEFWSU5HIChhcmdNYXgoY2hhbmdlX3R5cGUsIGxhc3RfdGltZSkgIT0gMikgQU5EIChOT1QgbWF0Y2gocGF0aCwgJyheZGJtcy8pfChebGlicy8pfChedGVzdHMvdGVzdGZsb3dzLyl8KF5wcm9ncmFtcy9zZXJ2ZXIvc3RvcmUvKScpKQogICAgICAgIE9SREVSIEJZIHBhdGggQVNDCiAgICApClNFTEVDVAogICAgbWluKHRpbWUpIEFTIG1pbl9kYXRlLAogICAgcGF0aCwKICAgIHN1bShsaW5lc19hZGRlZCkgLSBzdW0obGluZXNfZGVsZXRlZCkgQVMgbnVtX2xpbmVzLAogICAgdW5pcUV4YWN0KGF1dGhvcikgQVMgbnVtX2F1dGhvcnMsCiAgICBudW1fbGluZXMgLyBudW1fYXV0aG9ycyBBUyBsaW5lc19hdXRob3JfcmF0aW8KRlJPTSBnaXRfY2xpY2tob3VzZS5maWxlX2NoYW5nZXMKV0hFUkUgKHBhdGggSU4gKGN1cnJlbnRfZmlsZXMpKSBBTkQgKGZpbGVfZXh0ZW5zaW9uIElOICgnaCcsICdjcHAnLCAnc3FsJykpCkdST1VQIEJZIHBhdGgKSEFWSU5HIG1pbl9kYXRlIDw9IChub3coKSAtIHRvSW50ZXJ2YWxZZWFyKDEpKQpPUkRFUiBCWSBsaW5lc19hdXRob3JfcmF0aW8gREVTQwpMSU1JVCAxMA==) +[play](https://sql.clickhouse.com?query_id=RMHHZEDHFUCBGRQVQA2732) ```sql WITH current_files AS @@ -1144,7 +1144,7 @@ LIMIT 10 We interpret this as the number of lines added and removed by the day of the week. In this case, we focus on the [Functions directory](https://github.com/ClickHouse/ClickHouse/tree/master/src/Functions) -[play](https://play.clickhouse.com/play?user=play#U0VMRUNUCiAgICBkYXlPZldlZWssCiAgICB1bmlxKGNvbW1pdF9oYXNoKSBBUyBjb21taXRzLAogICAgc3VtKGxpbmVzX2FkZGVkKSBBUyBsaW5lc19hZGRlZCwKICAgIHN1bShsaW5lc19kZWxldGVkKSBBUyBsaW5lc19kZWxldGVkCkZST00gZ2l0X2NsaWNraG91c2UuZmlsZV9jaGFuZ2VzCldIRVJFIHBhdGggTElLRSAnc3JjL0Z1bmN0aW9ucyUnCkdST1VQIEJZIHRvRGF5T2ZXZWVrKHRpbWUpIEFTIGRheU9mV2Vlaw==) +[play](https://sql.clickhouse.com?query_id=PF3KEMYG5CVLJGCFYQEGB1) ```sql SELECT @@ -1171,7 +1171,7 @@ GROUP BY toDayOfWeek(time) AS dayOfWeek And by time of day, -[play](https://play.clickhouse.com/play?user=play#U0VMRUNUCiAgICBob3VyT2ZEYXksCiAgICB1bmlxKGNvbW1pdF9oYXNoKSBBUyBjb21taXRzLAogICAgc3VtKGxpbmVzX2FkZGVkKSBBUyBsaW5lc19hZGRlZCwKICAgIHN1bShsaW5lc19kZWxldGVkKSBBUyBsaW5lc19kZWxldGVkCkZST00gZ2l0X2NsaWNraG91c2UuZmlsZV9jaGFuZ2VzCldIRVJFIHBhdGggTElLRSAnc3JjL0Z1bmN0aW9ucyUnCkdST1VQIEJZIHRvSG91cih0aW1lKSBBUyBob3VyT2ZEYXk=) +[play](https://sql.clickhouse.com?query_id=Q4VDVKEGHHRBCUJHNCVTF1) ```sql SELECT @@ -1215,7 +1215,7 @@ GROUP BY toHour(time) AS hourOfDay This distribution makes sense given most of our development team is in Amsterdam. The `bar` functions helps us visualize these distributions: -[play](https://play.clickhouse.com/play?user=play#U0VMRUNUCiAgICBob3VyT2ZEYXksCiAgICBiYXIoY29tbWl0cywgMCwgNDAwLCA1MCkgQVMgY29tbWl0cywKICAgIGJhcihsaW5lc19hZGRlZCwgMCwgMzAwMDAsIDUwKSBBUyBsaW5lc19hZGRlZCwKICAgIGJhcihsaW5lc19kZWxldGVkLCAwLCAxNTAwMCwgNTApIEFTIGxpbmVzX2RlbGV0ZWQKRlJPTQooCiAgICBTRUxFQ1QKICAgICAgICBob3VyT2ZEYXksCiAgICAgICAgdW5pcShjb21taXRfaGFzaCkgQVMgY29tbWl0cywKICAgICAgICBzdW0obGluZXNfYWRkZWQpIEFTIGxpbmVzX2FkZGVkLAogICAgICAgIHN1bShsaW5lc19kZWxldGVkKSBBUyBsaW5lc19kZWxldGVkCiAgICBGUk9NIGdpdF9jbGlja2hvdXNlLmZpbGVfY2hhbmdlcwogICAgV0hFUkUgcGF0aCBMSUtFICdzcmMvRnVuY3Rpb25zJScKICAgIEdST1VQIEJZIHRvSG91cih0aW1lKSBBUyBob3VyT2ZEYXkKKQ==) +[play](https://sql.clickhouse.com?query_id=9AZ8CENV8N91YGW7T6IB68) ```sql SELECT @@ -1269,7 +1269,7 @@ FROM The `sign = -1` indicates a code deletion. We exclude punctuation and the insertion of empty lines. -[play](https://play.clickhouse.com/play?user=play#U0VMRUNUCiAgICBwcmV2X2F1dGhvciB8fCAnKGEpJyBhcyBhZGRfYXV0aG9yLAogICAgYXV0aG9yICB8fCAnKGQpJyBhcyBkZWxldGVfYXV0aG9yLAogICAgY291bnQoKSBBUyBjCkZST00gZ2l0X2NsaWNraG91c2UubGluZV9jaGFuZ2VzCldIRVJFIChzaWduID0gLTEpIEFORCAoZmlsZV9leHRlbnNpb24gSU4gKCdoJywgJ2NwcCcpKSBBTkQgKGxpbmVfdHlwZSBOT1QgSU4gKCdQdW5jdCcsICdFbXB0eScpKSBBTkQgKGF1dGhvciAhPSBwcmV2X2F1dGhvcikgQU5EIChwcmV2X2F1dGhvciAhPSAnJykKR1JPVVAgQlkKICAgIHByZXZfYXV0aG9yLAogICAgYXV0aG9yCk9SREVSIEJZIGMgREVTQwpMSU1JVCAxIEJZIHByZXZfYXV0aG9yCkxJTUlUIDEwMA==) +[play](https://sql.clickhouse.com?query_id=448O8GWAHY3EM6ZZ7AGLAM) ```sql SELECT @@ -1325,7 +1325,7 @@ Alexey clearly likes removing other peoples code. Lets exclude him for a more ba If we consider by just number of commits: -[play](https://play.clickhouse.com/play?user=play#U0VMRUNUCiAgICBkYXlfb2Zfd2VlaywKICAgIGF1dGhvciwKICAgIGNvdW50KCkgQVMgYwpGUk9NIGdpdF9jbGlja2hvdXNlLmNvbW1pdHMKR1JPVVAgQlkKICAgIGRheU9mV2Vlayh0aW1lKSBBUyBkYXlfb2Zfd2VlaywKICAgIGF1dGhvcgpPUkRFUiBCWQogICAgZGF5X29mX3dlZWsgQVNDLAogICAgYyBERVNDCkxJTUlUIDEgQlkgZGF5X29mX3dlZWs=) +[play](https://sql.clickhouse.com?query_id=WXPKFJCAHOKYKEVTWNFVCY) ```sql SELECT @@ -1356,7 +1356,7 @@ LIMIT 1 BY day_of_week OK, some possible advantages here to the longest contributor - our founder Alexey. Lets limit our analysis to the last year. -[play](https://play.clickhouse.com/play?user=play#U0VMRUNUCiAgICBkYXlfb2Zfd2VlaywKICAgIGF1dGhvciwKICAgIGNvdW50KCkgQVMgYwpGUk9NIGdpdF9jbGlja2hvdXNlLmNvbW1pdHMKV0hFUkUgdGltZSA+IChub3coKSAtIHRvSW50ZXJ2YWxZZWFyKDEpKQpHUk9VUCBCWQogICAgZGF5T2ZXZWVrKHRpbWUpIEFTIGRheV9vZl93ZWVrLAogICAgYXV0aG9yCk9SREVSIEJZCiAgICBkYXlfb2Zfd2VlayBBU0MsCiAgICBjIERFU0MKTElNSVQgMSBCWSBkYXlfb2Zfd2Vlaw==) +[play](https://sql.clickhouse.com?query_id=8YRJGHFTNJAWJ96XCJKKEH) ```sql SELECT @@ -1390,7 +1390,7 @@ This is still a little simple and doesn't reflect people's work. A better metric might be who is the top contributor each day as a fraction of the total work performed in the last year. Note that we treat the deletion and adding code equally. -[play](https://play.clickhouse.com/play?user=play#U0VMRUNUCiAgICB0b3BfYXV0aG9yLmRheV9vZl93ZWVrLAogICAgdG9wX2F1dGhvci5hdXRob3IsCiAgICB0b3BfYXV0aG9yLmF1dGhvcl93b3JrIC8gYWxsX3dvcmsudG90YWxfd29yayBBUyB0b3BfYXV0aG9yX3BlcmNlbnQKRlJPTQooCiAgICBTRUxFQ1QKICAgICAgICBkYXlfb2Zfd2VlaywKICAgICAgICBhdXRob3IsCiAgICAgICAgc3VtKGxpbmVzX2FkZGVkKSArIHN1bShsaW5lc19kZWxldGVkKSBBUyBhdXRob3Jfd29yawogICAgRlJPTSBnaXRfY2xpY2tob3VzZS5maWxlX2NoYW5nZXMKICAgIFdIRVJFIHRpbWUgPiAobm93KCkgLSB0b0ludGVydmFsWWVhcigxKSkKICAgIEdST1VQIEJZCiAgICAgICAgYXV0aG9yLAogICAgICAgIGRheU9mV2Vlayh0aW1lKSBBUyBkYXlfb2Zfd2VlawogICAgT1JERVIgQlkKICAgICAgICBkYXlfb2Zfd2VlayBBU0MsCiAgICAgICAgYXV0aG9yX3dvcmsgREVTQwogICAgTElNSVQgMSBCWSBkYXlfb2Zfd2VlawopIEFTIHRvcF9hdXRob3IKSU5ORVIgSk9JTgooCiAgICBTRUxFQ1QKICAgICAgICBkYXlfb2Zfd2VlaywKICAgICAgICBzdW0obGluZXNfYWRkZWQpICsgc3VtKGxpbmVzX2RlbGV0ZWQpIEFTIHRvdGFsX3dvcmsKICAgIEZST00gZ2l0X2NsaWNraG91c2UuZmlsZV9jaGFuZ2VzCiAgICBXSEVSRSB0aW1lID4gKG5vdygpIC0gdG9JbnRlcnZhbFllYXIoMSkpCiAgICBHUk9VUCBCWSBkYXlPZldlZWsodGltZSkgQVMgZGF5X29mX3dlZWsKKSBBUyBhbGxfd29yayBVU0lORyAoZGF5X29mX3dlZWsp) +[play](https://sql.clickhouse.com?query_id=VQF4KMRDSUEXGS1JFVDJHV) ```sql SELECT @@ -1440,7 +1440,7 @@ INNER JOIN We limit the analysis to the current files. For brevity, we restrict the results to a depth of 2 with 5 files per root folder. Adjust as required. -[play](https://play.clickhouse.com/play?user=play#V0lUSCBjdXJyZW50X2ZpbGVzIEFTCiAgICAoCiAgICAgICAgU0VMRUNUIHBhdGgKICAgICAgICBGUk9NCiAgICAgICAgKAogICAgICAgICAgICBTRUxFQ1QKICAgICAgICAgICAgICAgIG9sZF9wYXRoIEFTIHBhdGgsCiAgICAgICAgICAgICAgICBtYXgodGltZSkgQVMgbGFzdF90aW1lLAogICAgICAgICAgICAgICAgMiBBUyBjaGFuZ2VfdHlwZQogICAgICAgICAgICBGUk9NIGdpdF9jbGlja2hvdXNlLmZpbGVfY2hhbmdlcwogICAgICAgICAgICBHUk9VUCBCWSBvbGRfcGF0aAogICAgICAgICAgICBVTklPTiBBTEwKICAgICAgICAgICAgU0VMRUNUCiAgICAgICAgICAgICAgICBwYXRoLAogICAgICAgICAgICAgICAgbWF4KHRpbWUpIEFTIGxhc3RfdGltZSwKICAgICAgICAgICAgICAgIGFyZ01heChjaGFuZ2VfdHlwZSwgdGltZSkgQVMgY2hhbmdlX3R5cGUKICAgICAgICAgICAgRlJPTSBnaXRfY2xpY2tob3VzZS5maWxlX2NoYW5nZXMKICAgICAgICAgICAgR1JPVVAgQlkgcGF0aAogICAgICAgICkKICAgICAgICBHUk9VUCBCWSBwYXRoCiAgICAgICAgSEFWSU5HIChhcmdNYXgoY2hhbmdlX3R5cGUsIGxhc3RfdGltZSkgIT0gMikgQU5EIChOT1QgbWF0Y2gocGF0aCwgJyheZGJtcy8pfChebGlicy8pfChedGVzdHMvdGVzdGZsb3dzLyl8KF5wcm9ncmFtcy9zZXJ2ZXIvc3RvcmUvKScpKQogICAgICAgIE9SREVSIEJZIHBhdGggQVNDCiAgICApClNFTEVDVAogICAgY29uY2F0KHJvb3QsICcvJywgc3ViX2ZvbGRlcikgQVMgZm9sZGVyLAogICAgcm91bmQoYXZnKGRheXNfcHJlc2VudCkpIEFTIGF2Z19hZ2Vfb2ZfZmlsZXMsCiAgICBtaW4oZGF5c19wcmVzZW50KSBBUyBtaW5fYWdlX2ZpbGVzLAogICAgbWF4KGRheXNfcHJlc2VudCkgQVMgbWF4X2FnZV9maWxlcywKICAgIGNvdW50KCkgQVMgYwpGUk9NCigKICAgIFNFTEVDVAogICAgICAgIHBhdGgsCiAgICAgICAgZGF0ZURpZmYoJ2RheScsIG1pbih0aW1lKSwgdG9EYXRlKCcyMDIyLTExLTAzJykpIEFTIGRheXNfcHJlc2VudAogICAgRlJPTSBnaXRfY2xpY2tob3VzZS5maWxlX2NoYW5nZXMKICAgIFdIRVJFIChwYXRoIElOIChjdXJyZW50X2ZpbGVzKSkgQU5EIChmaWxlX2V4dGVuc2lvbiBJTiAoJ2gnLCAnY3BwJywgJ3NxbCcpKQogICAgR1JPVVAgQlkgcGF0aAopCkdST1VQIEJZCiAgICBzcGxpdEJ5Q2hhcignLycsIHBhdGgpWzFdIEFTIHJvb3QsCiAgICBzcGxpdEJ5Q2hhcignLycsIHBhdGgpWzJdIEFTIHN1Yl9mb2xkZXIKT1JERVIgQlkKICAgIHJvb3QgQVNDLAogICAgYyBERVNDCkxJTUlUIDUgQlkgcm9vdAo=) +[play](https://sql.clickhouse.com?query_id=6YWAUQYPZINZDJGBEZBNWG) ```sql WITH current_files AS @@ -1523,7 +1523,7 @@ LIMIT 5 BY root For this question, we need the number of lines written by an author divided by the total number of lines they have had removed by another contributor. -[play](https://play.clickhouse.com/play?user=play#U0VMRUNUCiAgICBrLAogICAgd3JpdHRlbl9jb2RlLmMsCiAgICByZW1vdmVkX2NvZGUuYywKICAgIHJlbW92ZWRfY29kZS5jIC8gd3JpdHRlbl9jb2RlLmMgQVMgcmVtb3ZlX3JhdGlvCkZST00KKAogICAgU0VMRUNUCiAgICAgICAgYXV0aG9yIEFTIGssCiAgICAgICAgY291bnQoKSBBUyBjCiAgICBGUk9NIGdpdF9jbGlja2hvdXNlLmxpbmVfY2hhbmdlcwogICAgV0hFUkUgKHNpZ24gPSAxKSBBTkQgKGZpbGVfZXh0ZW5zaW9uIElOICgnaCcsICdjcHAnKSkgQU5EIChsaW5lX3R5cGUgTk9UIElOICgnUHVuY3QnLCAnRW1wdHknKSkKICAgIEdST1VQIEJZIGsKKSBBUyB3cml0dGVuX2NvZGUKSU5ORVIgSk9JTgooCiAgICBTRUxFQ1QKICAgICAgICBwcmV2X2F1dGhvciBBUyBrLAogICAgICAgIGNvdW50KCkgQVMgYwogICAgRlJPTSBnaXRfY2xpY2tob3VzZS5saW5lX2NoYW5nZXMKICAgIFdIRVJFIChzaWduID0gLTEpIEFORCAoZmlsZV9leHRlbnNpb24gSU4gKCdoJywgJ2NwcCcpKSBBTkQgKGxpbmVfdHlwZSBOT1QgSU4gKCdQdW5jdCcsICdFbXB0eScpKSBBTkQgKGF1dGhvciAhPSBwcmV2X2F1dGhvcikKICAgIEdST1VQIEJZIGsKKSBBUyByZW1vdmVkX2NvZGUgVVNJTkcgKGspCldIRVJFIHdyaXR0ZW5fY29kZS5jID4gMTAwMApPUkRFUiBCWSByZW1vdmVfcmF0aW8gREVTQwpMSU1JVCAxMAo=) +[play](https://sql.clickhouse.com?query_id=T4DTWTB36WFSEYAZLMGRNF) ```sql SELECT @@ -1627,7 +1627,7 @@ This doesn't capture the notion of a "re-write" however, where a large portion o The query is limited to the current files only. We list all file changes by grouping by `path` and `commit_hash`, returning the number of lines added and removed. Using a window function, we estimate the file's total size at any moment in time by performing a cumulative sum and estimating the impact of any change on file size as `lines added - lines removed`. Using this statistic, we can calculate the percentage of the file that has been added or removed for each change. Finally, we count the number of file changes that constitute a rewrite per file i.e. `(percent_add >= 0.5) AND (percent_delete >= 0.5) AND current_size > 50`. Note we require files to be more than 50 lines to avoid early contributions to a file being counted as a rewrite. This also avoids a bias to very small files, which may be more likely to be rewritten. -[play](https://play.clickhouse.com/play?user=play#V0lUSAogICAgY3VycmVudF9maWxlcyBBUwogICAgKAogICAgICAgIFNFTEVDVCBwYXRoCiAgICAgICAgRlJPTQogICAgICAgICgKICAgICAgICAgICAgU0VMRUNUCiAgICAgICAgICAgICAgICBvbGRfcGF0aCBBUyBwYXRoLAogICAgICAgICAgICAgICAgbWF4KHRpbWUpIEFTIGxhc3RfdGltZSwKICAgICAgICAgICAgICAgIDIgQVMgY2hhbmdlX3R5cGUKICAgICAgICAgICAgRlJPTSBnaXRfY2xpY2tob3VzZS5maWxlX2NoYW5nZXMKICAgICAgICAgICAgR1JPVVAgQlkgb2xkX3BhdGgKICAgICAgICAgICAgVU5JT04gQUxMCiAgICAgICAgICAgIFNFTEVDVAogICAgICAgICAgICAgICAgcGF0aCwKICAgICAgICAgICAgICAgIG1heCh0aW1lKSBBUyBsYXN0X3RpbWUsCiAgICAgICAgICAgICAgICBhcmdNYXgoY2hhbmdlX3R5cGUsIHRpbWUpIEFTIGNoYW5nZV90eXBlCiAgICAgICAgICAgIEZST00gZ2l0X2NsaWNraG91c2UuZmlsZV9jaGFuZ2VzCiAgICAgICAgICAgIEdST1VQIEJZIHBhdGgKICAgICAgICApCiAgICAgICAgR1JPVVAgQlkgcGF0aAogICAgICAgIEhBVklORyAoYXJnTWF4KGNoYW5nZV90eXBlLCBsYXN0X3RpbWUpICE9IDIpIEFORCAoTk9UIG1hdGNoKHBhdGgsICcoXmRibXMvKXwoXmxpYnMvKXwoXnRlc3RzL3Rlc3RmbG93cy8pfChecHJvZ3JhbXMvc2VydmVyL3N0b3JlLyknKSkKICAgICAgICBPUkRFUiBCWSBwYXRoIEFTQwogICAgKSwKICAgIGNoYW5nZXMgQVMKICAgICgKICAgICAgICBTRUxFQ1QKICAgICAgICAgICAgcGF0aCwKICAgICAgICAgICAgbWF4KHRpbWUpIEFTIG1heF90aW1lLAogICAgICAgICAgICBjb21taXRfaGFzaCwKICAgICAgICAgICAgYW55KGxpbmVzX2FkZGVkKSBBUyBudW1fYWRkZWQsCiAgICAgICAgICAgIGFueShsaW5lc19kZWxldGVkKSBBUyBudW1fZGVsZXRlZCwKICAgICAgICAgICAgYW55KGNoYW5nZV90eXBlKSBBUyB0eXBlCiAgICAgICAgRlJPTSBnaXRfY2xpY2tob3VzZS5maWxlX2NoYW5nZXMKICAgICAgICBXSEVSRSAoY2hhbmdlX3R5cGUgSU4gKCdBZGQnLCAnTW9kaWZ5JykpIEFORCAocGF0aCBJTiAoY3VycmVudF9maWxlcykpIEFORCAoZmlsZV9leHRlbnNpb24gSU4gKCdoJywgJ2NwcCcsICdzcWwnKSkKICAgICAgICBHUk9VUCBCWQogICAgICAgICAgICBwYXRoLAogICAgICAgICAgICBjb21taXRfaGFzaAogICAgICAgIE9SREVSIEJZCiAgICAgICAgICAgIHBhdGggQVNDLAogICAgICAgICAgICBtYXhfdGltZSBBU0MKICAgICksCiAgICByZXdyaXRlcyBBUwogICAgKAogICAgICAgIFNFTEVDVAogICAgICAgICAgICBwYXRoLAogICAgICAgICAgICBjb21taXRfaGFzaCwKICAgICAgICAgICAgbWF4X3RpbWUsCiAgICAgICAgICAgIHR5cGUsCiAgICAgICAgICAgIG51bV9hZGRlZCwKICAgICAgICAgICAgbnVtX2RlbGV0ZWQsCiAgICAgICAgICAgIHN1bShudW1fYWRkZWQgLSBudW1fZGVsZXRlZCkgT1ZFUiAoUEFSVElUSU9OIEJZIHBhdGggT1JERVIgQlkgbWF4X3RpbWUgQVNDKSBBUyBjdXJyZW50X3NpemUsCiAgICAgICAgICAgIGlmKGN1cnJlbnRfc2l6ZSA+IDAsIG51bV9hZGRlZCAvIGN1cnJlbnRfc2l6ZSwgMCkgQVMgcGVyY2VudF9hZGQsCiAgICAgICAgICAgIGlmKGN1cnJlbnRfc2l6ZSA+IDAsIG51bV9kZWxldGVkIC8gY3VycmVudF9zaXplLCAwKSBBUyBwZXJjZW50X2RlbGV0ZQogICAgICAgIEZST00gY2hhbmdlcwogICAgKQpTRUxFQ1QKICAgIHBhdGgsCiAgICBjb3VudCgpIEFTIG51bV9yZXdyaXRlcwpGUk9NIHJld3JpdGVzCldIRVJFICh0eXBlID0gJ01vZGlmeScpIEFORCAocGVyY2VudF9hZGQgPj0gMC41KSBBTkQgKHBlcmNlbnRfZGVsZXRlID49IDAuNSkgQU5EIChjdXJyZW50X3NpemUgPiA1MCkKR1JPVVAgQlkgcGF0aApPUkRFUiBCWSBudW1fcmV3cml0ZXMgREVTQwpMSU1JVCAxMA==) +[play](https://sql.clickhouse.com?query_id=5PL1QLNSH6QQTR8H9HINNP) ```sql WITH @@ -1719,7 +1719,7 @@ We query for lines added, joining this with the lines removed - filtering to cas Finally, we aggregate across this dataset to compute the average number of days lines stay in the repository by the day of the week. -[play](https://play.clickhouse.com/play?user=play#U0VMRUNUCiAgICBkYXlfb2Zfd2Vla19hZGRlZCwKICAgIGNvdW50KCkgQVMgbnVtLAogICAgYXZnKGRheXNfcHJlc2VudCkgQVMgYXZnX2RheXNfcHJlc2VudApGUk9NCigKICAgIFNFTEVDVAogICAgICAgIGFkZGVkX2NvZGUubGluZSwKICAgICAgICBhZGRlZF9jb2RlLnRpbWUgQVMgYWRkZWRfZGF5LAogICAgICAgIGRhdGVEaWZmKCdkYXknLCBhZGRlZF9jb2RlLnRpbWUsIHJlbW92ZWRfY29kZS50aW1lKSBBUyBkYXlzX3ByZXNlbnQKICAgIEZST00KICAgICgKICAgICAgICBTRUxFQ1QKICAgICAgICAgICAgcGF0aCwKICAgICAgICAgICAgbGluZSwKICAgICAgICAgICAgbWF4KHRpbWUpIEFTIHRpbWUKICAgICAgICBGUk9NIGdpdF9jbGlja2hvdXNlLmxpbmVfY2hhbmdlcwogICAgICAgIFdIRVJFIChzaWduID0gMSkgQU5EIChsaW5lX3R5cGUgTk9UIElOICgnUHVuY3QnLCAnRW1wdHknKSkKICAgICAgICBHUk9VUCBCWQogICAgICAgICAgICBwYXRoLAogICAgICAgICAgICBsaW5lCiAgICApIEFTIGFkZGVkX2NvZGUKICAgIElOTkVSIEpPSU4KICAgICgKICAgICAgICBTRUxFQ1QKICAgICAgICAgICAgcGF0aCwKICAgICAgICAgICAgbGluZSwKICAgICAgICAgICAgbWF4KHRpbWUpIEFTIHRpbWUKICAgICAgICBGUk9NIGdpdF9jbGlja2hvdXNlLmxpbmVfY2hhbmdlcwogICAgICAgIFdIRVJFIChzaWduID0gLTEpIEFORCAobGluZV90eXBlIE5PVCBJTiAoJ1B1bmN0JywgJ0VtcHR5JykpCiAgICAgICAgR1JPVVAgQlkKICAgICAgICAgICAgcGF0aCwKICAgICAgICAgICAgbGluZQogICAgKSBBUyByZW1vdmVkX2NvZGUgVVNJTkcgKHBhdGgsIGxpbmUpCiAgICBXSEVSRSByZW1vdmVkX2NvZGUudGltZSA+IGFkZGVkX2NvZGUudGltZQopCkdST1VQIEJZIGRheU9mV2VlayhhZGRlZF9kYXkpIEFTIGRheV9vZl93ZWVrX2FkZGVk) +[play](https://sql.clickhouse.com?query_id=GVF23LEZTNZI22BT8LZBBE) ```sql SELECT @@ -1778,7 +1778,7 @@ GROUP BY dayOfWeek(added_day) AS day_of_week_added This query uses the same principle as [What weekday does the code have the highest chance to stay in the repository](#what-weekday-does-the-code-have-the-highest-chance-to-stay-in-the-repository) - by aiming to uniquely identify a line of code using the path and line contents. This allows us to identify the time between when a line was added and removed. We filter to current files and code only, however, and average the time for each file across lines. -[play](https://play.clickhouse.com/play?user=play#V0lUSAogICAgY3VycmVudF9maWxlcyBBUwogICAgKAogICAgICAgIFNFTEVDVCBwYXRoCiAgICAgICAgRlJPTQogICAgICAgICgKICAgICAgICAgICAgU0VMRUNUCiAgICAgICAgICAgICAgICBvbGRfcGF0aCBBUyBwYXRoLAogICAgICAgICAgICAgICAgbWF4KHRpbWUpIEFTIGxhc3RfdGltZSwKICAgICAgICAgICAgICAgIDIgQVMgY2hhbmdlX3R5cGUKICAgICAgICAgICAgRlJPTSBnaXRfY2xpY2tob3VzZS5maWxlX2NoYW5nZXMKICAgICAgICAgICAgR1JPVVAgQlkgb2xkX3BhdGgKICAgICAgICAgICAgVU5JT04gQUxMCiAgICAgICAgICAgIFNFTEVDVAogICAgICAgICAgICAgICAgcGF0aCwKICAgICAgICAgICAgICAgIG1heCh0aW1lKSBBUyBsYXN0X3RpbWUsCiAgICAgICAgICAgICAgICBhcmdNYXgoY2hhbmdlX3R5cGUsIHRpbWUpIEFTIGNoYW5nZV90eXBlCiAgICAgICAgICAgIEZST00gZ2l0X2NsaWNraG91c2UuZmlsZV9jaGFuZ2VzCiAgICAgICAgICAgIEdST1VQIEJZIHBhdGgKICAgICAgICApCiAgICAgICAgR1JPVVAgQlkgcGF0aAogICAgICAgIEhBVklORyAoYXJnTWF4KGNoYW5nZV90eXBlLCBsYXN0X3RpbWUpICE9IDIpIEFORCAoTk9UIG1hdGNoKHBhdGgsICcoXmRibXMvKXwoXmxpYnMvKXwoXnRlc3RzL3Rlc3RmbG93cy8pfChecHJvZ3JhbXMvc2VydmVyL3N0b3JlLyknKSkKICAgICAgICBPUkRFUiBCWSBwYXRoIEFTQwogICAgKSwKICAgIGxpbmVzX3JlbW92ZWQgQVMKICAgICgKICAgICAgICBTRUxFQ1QKICAgICAgICAgICAgYWRkZWRfY29kZS5wYXRoIEFTIHBhdGgsCiAgICAgICAgICAgIGFkZGVkX2NvZGUubGluZSwKICAgICAgICAgICAgYWRkZWRfY29kZS50aW1lIEFTIGFkZGVkX2RheSwKICAgICAgICAgICAgZGF0ZURpZmYoJ2RheScsIGFkZGVkX2NvZGUudGltZSwgcmVtb3ZlZF9jb2RlLnRpbWUpIEFTIGRheXNfcHJlc2VudAogICAgICAgIEZST00KICAgICAgICAoCiAgICAgICAgICAgIFNFTEVDVAogICAgICAgICAgICAgICAgcGF0aCwKICAgICAgICAgICAgICAgIGxpbmUsCiAgICAgICAgICAgICAgICBtYXgodGltZSkgQVMgdGltZSwKICAgICAgICAgICAgICAgIGFueShmaWxlX2V4dGVuc2lvbikgQVMgZmlsZV9leHRlbnNpb24KICAgICAgICAgICAgRlJPTSBnaXRfY2xpY2tob3VzZS5saW5lX2NoYW5nZXMKICAgICAgICAgICAgV0hFUkUgKHNpZ24gPSAxKSBBTkQgKGxpbmVfdHlwZSBOT1QgSU4gKCdQdW5jdCcsICdFbXB0eScpKQogICAgICAgICAgICBHUk9VUCBCWQogICAgICAgICAgICAgICAgcGF0aCwKICAgICAgICAgICAgICAgIGxpbmUKICAgICAgICApIEFTIGFkZGVkX2NvZGUKICAgICAgICBJTk5FUiBKT0lOCiAgICAgICAgKAogICAgICAgICAgICBTRUxFQ1QKICAgICAgICAgICAgICAgIHBhdGgsCiAgICAgICAgICAgICAgICBsaW5lLAogICAgICAgICAgICAgICAgbWF4KHRpbWUpIEFTIHRpbWUKICAgICAgICAgICAgRlJPTSBnaXRfY2xpY2tob3VzZS5saW5lX2NoYW5nZXMKICAgICAgICAgICAgV0hFUkUgKHNpZ24gPSAtMSkgQU5EIChsaW5lX3R5cGUgTk9UIElOICgnUHVuY3QnLCAnRW1wdHknKSkKICAgICAgICAgICAgR1JPVVAgQlkKICAgICAgICAgICAgICAgIHBhdGgsCiAgICAgICAgICAgICAgICBsaW5lCiAgICAgICAgKSBBUyByZW1vdmVkX2NvZGUgVVNJTkcgKHBhdGgsIGxpbmUpCiAgICAgICAgV0hFUkUgKHJlbW92ZWRfY29kZS50aW1lID4gYWRkZWRfY29kZS50aW1lKSBBTkQgKHBhdGggSU4gKGN1cnJlbnRfZmlsZXMpKSBBTkQgKGZpbGVfZXh0ZW5zaW9uIElOICgnaCcsICdjcHAnLCAnc3FsJykpCiAgICApClNFTEVDVAogICAgcGF0aCwKICAgIGF2ZyhkYXlzX3ByZXNlbnQpIEFTIGF2Z19jb2RlX2FnZQpGUk9NIGxpbmVzX3JlbW92ZWQKR1JPVVAgQlkgcGF0aApPUkRFUiBCWSBhdmdfY29kZV9hZ2UgREVTQwpMSU1JVCAxMA==) +[play](https://sql.clickhouse.com?query_id=3CYYT7HEHWRFHVCM9JCKSU) ```sql WITH @@ -1869,7 +1869,7 @@ There are a few ways we can address this question. Focusing on the code to test Note we limit to users with more than 20 changes to focus on regular committers and avoid a bias to one-off contributions. -[play](https://play.clickhouse.com/play?user=play#U0VMRUNUCiAgICBhdXRob3IsCiAgICBjb3VudElmKChmaWxlX2V4dGVuc2lvbiBJTiAoJ2gnLCAnY3BwJywgJ3NxbCcsICdzaCcsICdweScsICdleHBlY3QnKSkgQU5EIChwYXRoIExJS0UgJyV0ZXN0cyUnKSkgQVMgdGVzdCwKICAgIGNvdW50SWYoKGZpbGVfZXh0ZW5zaW9uIElOICgnaCcsICdjcHAnLCAnc3FsJykpIEFORCAoTk9UIChwYXRoIExJS0UgJyV0ZXN0cyUnKSkpIEFTIGNvZGUsCiAgICBjb2RlIC8gKGNvZGUgKyB0ZXN0KSBBUyByYXRpb19jb2RlCkZST00gZ2l0X2NsaWNraG91c2UuZmlsZV9jaGFuZ2VzCkdST1VQIEJZIGF1dGhvcgpIQVZJTkcgY29kZSA+IDIwCk9SREVSIEJZIGNvZGUgREVTQwpMSU1JVCAyMA==) +[play](https://sql.clickhouse.com?query_id=JGKZSEQDPDTDKZXD3ZCGLE) ```sql SELECT @@ -1911,7 +1911,7 @@ LIMIT 20 We can plot this distribution as a histogram. -[play](https://play.clickhouse.com/play?user=play#V0lUSCAoCiAgICAgICAgU0VMRUNUIGhpc3RvZ3JhbSgxMCkocmF0aW9fY29kZSkgQVMgaGlzdAogICAgICAgIEZST00KICAgICAgICAoCiAgICAgICAgICAgIFNFTEVDVAogICAgICAgICAgICAgICAgYXV0aG9yLAogICAgICAgICAgICAgICAgY291bnRJZigoZmlsZV9leHRlbnNpb24gSU4gKCdoJywgJ2NwcCcsICdzcWwnLCAnc2gnLCAncHknLCAnZXhwZWN0JykpIEFORCAocGF0aCBMSUtFICcldGVzdHMlJykpIEFTIHRlc3QsCiAgICAgICAgICAgICAgICBjb3VudElmKChmaWxlX2V4dGVuc2lvbiBJTiAoJ2gnLCAnY3BwJywgJ3NxbCcpKSBBTkQgKE5PVCAocGF0aCBMSUtFICcldGVzdHMlJykpKSBBUyBjb2RlLAogICAgICAgICAgICAgICAgY29kZSAvIChjb2RlICsgdGVzdCkgQVMgcmF0aW9fY29kZQogICAgICAgICAgICBGUk9NIGdpdF9jbGlja2hvdXNlLmZpbGVfY2hhbmdlcwogICAgICAgICAgICBHUk9VUCBCWSBhdXRob3IKICAgICAgICAgICAgSEFWSU5HIGNvZGUgPiAyMAogICAgICAgICAgICBPUkRFUiBCWSBjb2RlIERFU0MKICAgICAgICAgICAgTElNSVQgMjAKICAgICAgICApCiAgICApIEFTIGhpc3QKU0VMRUNUCiAgICBhcnJheUpvaW4oaGlzdCkuMSBBUyBsb3dlciwKICAgIGFycmF5Sm9pbihoaXN0KS4yIEFTIHVwcGVyLAogICAgYmFyKGFycmF5Sm9pbihoaXN0KS4zLCAwLCAxMDAsIDUwMCkgQVMgYmFy) +[play](https://sql.clickhouse.com?query_id=S5AJIIRGSUAY1JXEVHQDAK) ```sql WITH ( @@ -1954,7 +1954,7 @@ Most contributors write more code than tests, as you'd expect. What about who adds the most comments when contributing code? -[play](https://play.clickhouse.com/play?user=play#U0VMRUNUCiAgICBhdXRob3IsCiAgICBhdmcocmF0aW9fY29tbWVudHMpIEFTIGF2Z19yYXRpb19jb21tZW50cywKICAgIHN1bShjb2RlKSBBUyBjb2RlCkZST00KKAogICAgU0VMRUNUCiAgICAgICAgYXV0aG9yLAogICAgICAgIGNvbW1pdF9oYXNoLAogICAgICAgIGNvdW50SWYobGluZV90eXBlID0gJ0NvbW1lbnQnKSBBUyBjb21tZW50cywKICAgICAgICBjb3VudElmKGxpbmVfdHlwZSA9ICdDb2RlJykgQVMgY29kZSwKICAgICAgICBpZihjb21tZW50cyA+IDAsIGNvbW1lbnRzIC8gKGNvbW1lbnRzICsgY29kZSksIDApIEFTIHJhdGlvX2NvbW1lbnRzCiAgICBGUk9NIGdpdF9jbGlja2hvdXNlLmxpbmVfY2hhbmdlcwogICAgR1JPVVAgQlkKICAgICAgICBhdXRob3IsCiAgICAgICAgY29tbWl0X2hhc2gKKQpHUk9VUCBCWSBhdXRob3IKT1JERVIgQlkgY29kZSBERVNDCkxJTUlUIDEwCg==) +[play](https://sql.clickhouse.com?query_id=EXPHDIURBTOXXOK1TGNNYD) ```sql SELECT @@ -2038,7 +2038,7 @@ To compute this, we first work out each author's comments ratio over time - simi After calculating the average by-week offset across all authors, we sample these results by selecting every 10th week. -[play](https://play.clickhouse.com/play?user=play#V0lUSCBhdXRob3JfcmF0aW9zX2J5X29mZnNldCBBUwogICAgKAogICAgICAgIFNFTEVDVAogICAgICAgICAgICBhdXRob3IsCiAgICAgICAgICAgIGRhdGVEaWZmKCd3ZWVrJywgc3RhcnRfZGF0ZXMuc3RhcnRfZGF0ZSwgY29udHJpYnV0aW9ucy53ZWVrKSBBUyB3ZWVrX29mZnNldCwKICAgICAgICAgICAgcmF0aW9fY29kZQogICAgICAgIEZST00KICAgICAgICAoCiAgICAgICAgICAgIFNFTEVDVAogICAgICAgICAgICAgICAgYXV0aG9yLAogICAgICAgICAgICAgICAgdG9TdGFydE9mV2VlayhtaW4odGltZSkpIEFTIHN0YXJ0X2RhdGUKICAgICAgICAgICAgRlJPTSBnaXRfY2xpY2tob3VzZS5saW5lX2NoYW5nZXMKICAgICAgICAgICAgV0hFUkUgZmlsZV9leHRlbnNpb24gSU4gKCdoJywgJ2NwcCcsICdzcWwnKQogICAgICAgICAgICBHUk9VUCBCWSBhdXRob3IgQVMgc3RhcnRfZGF0ZXMKICAgICAgICApIEFTIHN0YXJ0X2RhdGVzCiAgICAgICAgSU5ORVIgSk9JTgogICAgICAgICgKICAgICAgICAgICAgU0VMRUNUCiAgICAgICAgICAgICAgICBhdXRob3IsCiAgICAgICAgICAgICAgICBjb3VudElmKGxpbmVfdHlwZSA9ICdDb2RlJykgQVMgY29kZSwKICAgICAgICAgICAgICAgIGNvdW50SWYoKGxpbmVfdHlwZSA9ICdDb21tZW50JykgT1IgKGxpbmVfdHlwZSA9ICdQdW5jdCcpKSBBUyBjb21tZW50cywKICAgICAgICAgICAgICAgIGNvbW1lbnRzIC8gKGNvbW1lbnRzICsgY29kZSkgQVMgcmF0aW9fY29kZSwKICAgICAgICAgICAgICAgIHRvU3RhcnRPZldlZWsodGltZSkgQVMgd2VlawogICAgICAgICAgICBGUk9NIGdpdF9jbGlja2hvdXNlLmxpbmVfY2hhbmdlcwogICAgICAgICAgICBXSEVSRSAoZmlsZV9leHRlbnNpb24gSU4gKCdoJywgJ2NwcCcsICdzcWwnKSkgQU5EIChzaWduID0gMSkKICAgICAgICAgICAgR1JPVVAgQlkKICAgICAgICAgICAgICAgIHRpbWUsCiAgICAgICAgICAgICAgICBhdXRob3IKICAgICAgICAgICAgSEFWSU5HIGNvZGUgPiAyMAogICAgICAgICAgICBPUkRFUiBCWQogICAgICAgICAgICAgICAgYXV0aG9yIEFTQywKICAgICAgICAgICAgICAgIHRpbWUgQVNDCiAgICAgICAgKSBBUyBjb250cmlidXRpb25zIFVTSU5HIChhdXRob3IpCiAgICApClNFTEVDVAogICAgd2Vla19vZmZzZXQsCiAgICBhdmcocmF0aW9fY29kZSkgQVMgYXZnX2NvZGVfcmF0aW8KRlJPTSBhdXRob3JfcmF0aW9zX2J5X29mZnNldApHUk9VUCBCWSB3ZWVrX29mZnNldApIQVZJTkcgKHdlZWtfb2Zmc2V0ICUgMTApID0gMApPUkRFUiBCWSB3ZWVrX29mZnNldCBBU0MKTElNSVQgMjAK) +[play](https://sql.clickhouse.com?query_id=SBHEWR8XC4PRHY13HPPKCN) ```sql WITH author_ratios_by_offset AS @@ -2116,7 +2116,7 @@ Encouragingly, our comment % is pretty constant and doesn't degrade the longer a We can use the same principle as [List files that were rewritten most number of time or by most of authors](#list-files-that-were-rewritten-most-number-of-time-or-by-most-of-authors) to identify rewrites but consider all files. A window function is used to compute the time between rewrites for each file. From this, we can calculate an average and median across all files. -[play](https://play.clickhouse.com/play?user=play#V0lUSAogICAgY2hhbmdlcyBBUwogICAgKAogICAgICAgIFNFTEVDVAogICAgICAgICAgICBwYXRoLAogICAgICAgICAgICBjb21taXRfaGFzaCwKICAgICAgICAgICAgbWF4X3RpbWUsCiAgICAgICAgICAgIHR5cGUsCiAgICAgICAgICAgIG51bV9hZGRlZCwKICAgICAgICAgICAgbnVtX2RlbGV0ZWQsCiAgICAgICAgICAgIHN1bShudW1fYWRkZWQgLSBudW1fZGVsZXRlZCkgT1ZFUiAoUEFSVElUSU9OIEJZIHBhdGggT1JERVIgQlkgbWF4X3RpbWUgQVNDKSBBUyBjdXJyZW50X3NpemUsCiAgICAgICAgICAgIGlmKGN1cnJlbnRfc2l6ZSA+IDAsIG51bV9hZGRlZCAvIGN1cnJlbnRfc2l6ZSwgMCkgQVMgcGVyY2VudF9hZGQsCiAgICAgICAgICAgIGlmKGN1cnJlbnRfc2l6ZSA+IDAsIG51bV9kZWxldGVkIC8gY3VycmVudF9zaXplLCAwKSBBUyBwZXJjZW50X2RlbGV0ZQogICAgICAgIEZST00KICAgICAgICAoCiAgICAgICAgICAgIFNFTEVDVAogICAgICAgICAgICAgICAgcGF0aCwKICAgICAgICAgICAgICAgIG1heCh0aW1lKSBBUyBtYXhfdGltZSwKICAgICAgICAgICAgICAgIGNvbW1pdF9oYXNoLAogICAgICAgICAgICAgICAgYW55KGxpbmVzX2FkZGVkKSBBUyBudW1fYWRkZWQsCiAgICAgICAgICAgICAgICBhbnkobGluZXNfZGVsZXRlZCkgQVMgbnVtX2RlbGV0ZWQsCiAgICAgICAgICAgICAgICBhbnkoY2hhbmdlX3R5cGUpIEFTIHR5cGUKICAgICAgICAgICAgRlJPTSBnaXRfY2xpY2tob3VzZS5maWxlX2NoYW5nZXMKICAgICAgICAgICAgV0hFUkUgKGNoYW5nZV90eXBlIElOICgnQWRkJywgJ01vZGlmeScpKSBBTkQgKGZpbGVfZXh0ZW5zaW9uIElOICgnaCcsICdjcHAnLCAnc3FsJykpCiAgICAgICAgICAgIEdST1VQIEJZCiAgICAgICAgICAgICAgICBwYXRoLAogICAgICAgICAgICAgICAgY29tbWl0X2hhc2gKICAgICAgICAgICAgT1JERVIgQlkKICAgICAgICAgICAgICAgIHBhdGggQVNDLAogICAgICAgICAgICAgICAgbWF4X3RpbWUgQVNDCiAgICAgICAgKQogICAgKSwKICAgIHJld3JpdGVzIEFTCiAgICAoCiAgICAgICAgU0VMRUNUCiAgICAgICAgICAgICosCiAgICAgICAgICAgIGFueShtYXhfdGltZSkgT1ZFUiAoUEFSVElUSU9OIEJZIHBhdGggT1JERVIgQlkgbWF4X3RpbWUgQVNDIFJPV1MgQkVUV0VFTiAxIFBSRUNFRElORyBBTkQgQ1VSUkVOVCBST1cpIEFTIHByZXZpb3VzX3Jld3JpdGUsCiAgICAgICAgICAgIGRhdGVEaWZmKCdkYXknLCBwcmV2aW91c19yZXdyaXRlLCBtYXhfdGltZSkgQVMgcmV3cml0ZV9kYXlzCiAgICAgICAgRlJPTSBjaGFuZ2VzCiAgICAgICAgV0hFUkUgKHR5cGUgPSAnTW9kaWZ5JykgQU5EIChwZXJjZW50X2FkZCA+PSAwLjUpIEFORCAocGVyY2VudF9kZWxldGUgPj0gMC41KSBBTkQgKGN1cnJlbnRfc2l6ZSA+IDUwKQogICAgKQpTRUxFQ1QKICAgIGF2Z0lmKHJld3JpdGVfZGF5cywgcmV3cml0ZV9kYXlzID4gMCkgQVMgYXZnX3Jld3JpdGVfdGltZSwKICAgIHF1YW50aWxlc1RpbWluZ0lmKDAuNSkocmV3cml0ZV9kYXlzLCByZXdyaXRlX2RheXMgPiAwKSBBUyBoYWxmX2xpZmUKRlJPTSByZXdyaXRlcw==) +[play](https://sql.clickhouse.com?query_id=WSHUEPJP9TNJUH7QITWWOR) ```sql WITH @@ -2176,7 +2176,7 @@ FROM rewrites Similar to [What is the average time before code will be rewritten and the median (half-life of code decay)?](#what-is-the-average-time-before-code-will-be-rewritten-and-the-median-half-life-of-code-decay) and [List files that were rewritten most number of time or by most of authors](#list-files-that-were-rewritten-most-number-of-time-or-by-most-of-authors), except we aggregate by day of week. Adjust as required e.g. month of year. -[play](https://play.clickhouse.com/play?user=play#V0lUSAogICAgY2hhbmdlcyBBUwogICAgKAogICAgICAgIFNFTEVDVAogICAgICAgICAgICBwYXRoLAogICAgICAgICAgICBjb21taXRfaGFzaCwKICAgICAgICAgICAgbWF4X3RpbWUsCiAgICAgICAgICAgIHR5cGUsCiAgICAgICAgICAgIG51bV9hZGRlZCwKICAgICAgICAgICAgbnVtX2RlbGV0ZWQsCiAgICAgICAgICAgIHN1bShudW1fYWRkZWQgLSBudW1fZGVsZXRlZCkgT1ZFUiAoUEFSVElUSU9OIEJZIHBhdGggT1JERVIgQlkgbWF4X3RpbWUgQVNDKSBBUyBjdXJyZW50X3NpemUsCiAgICAgICAgICAgIGlmKGN1cnJlbnRfc2l6ZSA+IDAsIG51bV9hZGRlZCAvIGN1cnJlbnRfc2l6ZSwgMCkgQVMgcGVyY2VudF9hZGQsCiAgICAgICAgICAgIGlmKGN1cnJlbnRfc2l6ZSA+IDAsIG51bV9kZWxldGVkIC8gY3VycmVudF9zaXplLCAwKSBBUyBwZXJjZW50X2RlbGV0ZQogICAgICAgIEZST00KICAgICAgICAoCiAgICAgICAgICAgIFNFTEVDVAogICAgICAgICAgICAgICAgcGF0aCwKICAgICAgICAgICAgICAgIG1heCh0aW1lKSBBUyBtYXhfdGltZSwKICAgICAgICAgICAgICAgIGNvbW1pdF9oYXNoLAogICAgICAgICAgICAgICAgYW55KGZpbGVfbGluZXNfYWRkZWQpIEFTIG51bV9hZGRlZCwKICAgICAgICAgICAgICAgIGFueShmaWxlX2xpbmVzX2RlbGV0ZWQpIEFTIG51bV9kZWxldGVkLAogICAgICAgICAgICAgICAgYW55KGZpbGVfY2hhbmdlX3R5cGUpIEFTIHR5cGUKICAgICAgICAgICAgRlJPTSBnaXRfY2xpY2tob3VzZS5saW5lX2NoYW5nZXMKICAgICAgICAgICAgV0hFUkUgKGZpbGVfY2hhbmdlX3R5cGUgSU4gKCdBZGQnLCAnTW9kaWZ5JykpIEFORCAoZmlsZV9leHRlbnNpb24gSU4gKCdoJywgJ2NwcCcsICdzcWwnKSkKICAgICAgICAgICAgR1JPVVAgQlkKICAgICAgICAgICAgICAgIHBhdGgsCiAgICAgICAgICAgICAgICBjb21taXRfaGFzaAogICAgICAgICAgICBPUkRFUiBCWQogICAgICAgICAgICAgICAgcGF0aCBBU0MsCiAgICAgICAgICAgICAgICBtYXhfdGltZSBBU0MKICAgICAgICApCiAgICApLAogICAgcmV3cml0ZXMgQVMKICAgICgKICAgICAgICBTRUxFQ1QgYW55KG1heF90aW1lKSBPVkVSIChQQVJUSVRJT04gQlkgcGF0aCBPUkRFUiBCWSBtYXhfdGltZSBBU0MgUk9XUyBCRVRXRUVOIDEgUFJFQ0VESU5HIEFORCBDVVJSRU5UIFJPVykgQVMgcHJldmlvdXNfcmV3cml0ZQogICAgICAgIEZST00gY2hhbmdlcwogICAgICAgIFdIRVJFICh0eXBlID0gJ01vZGlmeScpIEFORCAocGVyY2VudF9hZGQgPj0gMC41KSBBTkQgKHBlcmNlbnRfZGVsZXRlID49IDAuNSkgQU5EIChjdXJyZW50X3NpemUgPiA1MCkKICAgICkKU0VMRUNUCiAgICBkYXlPZldlZWsocHJldmlvdXNfcmV3cml0ZSkgQVMgZGF5T2ZXZWVrLAogICAgY291bnQoKSBBUyBudW1fcmVfd3JpdGVzCkZST00gcmV3cml0ZXMKR1JPVVAgQlkgZGF5T2ZXZWVr) +[play](https://sql.clickhouse.com?query_id=8PQNWEWHAJTGN6FTX59KH2) ```sql WITH @@ -2240,7 +2240,7 @@ GROUP BY dayOfWeek We define "sticky" as how long does an author's code stay before its rewritten. Similar to the previous question [What is the average time before code will be rewritten and the median (half-life of code decay)?](#what-is-the-average-time-before-code-will-be-rewritten-and-the-median-half-life-of-code-decay) - using the same metric for rewrites i.e. 50% additions and 50% deletions to the file. We compute the average rewrite time per author and only consider contributors with more than two files. -[play](https://play.clickhouse.com/play?user=play#V0lUSAogICAgY2hhbmdlcyBBUwogICAgKAogICAgICAgIFNFTEVDVAogICAgICAgICAgICBwYXRoLAogICAgICAgICAgICBhdXRob3IsCiAgICAgICAgICAgIGNvbW1pdF9oYXNoLAogICAgICAgICAgICBtYXhfdGltZSwKICAgICAgICAgICAgdHlwZSwKICAgICAgICAgICAgbnVtX2FkZGVkLAogICAgICAgICAgICBudW1fZGVsZXRlZCwKICAgICAgICAgICAgc3VtKG51bV9hZGRlZCAtIG51bV9kZWxldGVkKSBPVkVSIChQQVJUSVRJT04gQlkgcGF0aCBPUkRFUiBCWSBtYXhfdGltZSBBU0MpIEFTIGN1cnJlbnRfc2l6ZSwKICAgICAgICAgICAgaWYoY3VycmVudF9zaXplID4gMCwgbnVtX2FkZGVkIC8gY3VycmVudF9zaXplLCAwKSBBUyBwZXJjZW50X2FkZCwKICAgICAgICAgICAgaWYoY3VycmVudF9zaXplID4gMCwgbnVtX2RlbGV0ZWQgLyBjdXJyZW50X3NpemUsIDApIEFTIHBlcmNlbnRfZGVsZXRlCiAgICAgICAgRlJPTQogICAgICAgICgKICAgICAgICAgICAgU0VMRUNUCiAgICAgICAgICAgICAgICBwYXRoLAogICAgICAgICAgICAgICAgYW55KGF1dGhvcikgQVMgYXV0aG9yLAogICAgICAgICAgICAgICAgbWF4KHRpbWUpIEFTIG1heF90aW1lLAogICAgICAgICAgICAgICAgY29tbWl0X2hhc2gsCiAgICAgICAgICAgICAgICBhbnkoZmlsZV9saW5lc19hZGRlZCkgQVMgbnVtX2FkZGVkLAogICAgICAgICAgICAgICAgYW55KGZpbGVfbGluZXNfZGVsZXRlZCkgQVMgbnVtX2RlbGV0ZWQsCiAgICAgICAgICAgICAgICBhbnkoZmlsZV9jaGFuZ2VfdHlwZSkgQVMgdHlwZQogICAgICAgICAgICBGUk9NIGdpdF9jbGlja2hvdXNlLmxpbmVfY2hhbmdlcwogICAgICAgICAgICBXSEVSRSAoZmlsZV9jaGFuZ2VfdHlwZSBJTiAoJ0FkZCcsICdNb2RpZnknKSkgQU5EIChmaWxlX2V4dGVuc2lvbiBJTiAoJ2gnLCAnY3BwJywgJ3NxbCcpKQogICAgICAgICAgICBHUk9VUCBCWQogICAgICAgICAgICAgICAgcGF0aCwKICAgICAgICAgICAgICAgIGNvbW1pdF9oYXNoCiAgICAgICAgICAgIE9SREVSIEJZCiAgICAgICAgICAgICAgICBwYXRoIEFTQywKICAgICAgICAgICAgICAgIG1heF90aW1lIEFTQwogICAgICAgICkKICAgICksCiAgICByZXdyaXRlcyBBUwogICAgKAogICAgICAgIFNFTEVDVAogICAgICAgICAgICAqLAogICAgICAgICAgICBhbnkobWF4X3RpbWUpIE9WRVIgKFBBUlRJVElPTiBCWSBwYXRoIE9SREVSIEJZIG1heF90aW1lIEFTQyBST1dTIEJFVFdFRU4gMSBQUkVDRURJTkcgQU5EIENVUlJFTlQgUk9XKSBBUyBwcmV2aW91c19yZXdyaXRlLAogICAgICAgICAgICBkYXRlRGlmZignZGF5JywgcHJldmlvdXNfcmV3cml0ZSwgbWF4X3RpbWUpIEFTIHJld3JpdGVfZGF5cywKICAgICAgICAgICAgYW55KGF1dGhvcikgT1ZFUiAoUEFSVElUSU9OIEJZIHBhdGggT1JERVIgQlkgbWF4X3RpbWUgQVNDIFJPV1MgQkVUV0VFTiAxIFBSRUNFRElORyBBTkQgQ1VSUkVOVCBST1cpIEFTIHByZXZfYXV0aG9yCiAgICAgICAgRlJPTSBjaGFuZ2VzCiAgICAgICAgV0hFUkUgKHR5cGUgPSAnTW9kaWZ5JykgQU5EIChwZXJjZW50X2FkZCA+PSAwLjUpIEFORCAocGVyY2VudF9kZWxldGUgPj0gMC41KSBBTkQgKGN1cnJlbnRfc2l6ZSA+IDUwKQogICAgKQpTRUxFQ1QKICAgIHByZXZfYXV0aG9yLAogICAgYXZnKHJld3JpdGVfZGF5cykgQVMgYywKICAgIHVuaXEocGF0aCkgQVMgbnVtX2ZpbGVzCkZST00gcmV3cml0ZXMKR1JPVVAgQlkgcHJldl9hdXRob3IKSEFWSU5HIG51bV9maWxlcyA+IDIKT1JERVIgQlkgYyBERVNDCkxJTUlUIDEwCg==) +[play](https://sql.clickhouse.com?query_id=BKHLVVWN5SET1VTIFQ8JVK) ```sql WITH @@ -2319,7 +2319,7 @@ This query first requires us to calculate the days when an author has committed. Our subsequent array functions compute each author's longest sequence of consecutive ones. First, the `groupArray` function is used to collate all `consecutive_day` values for an author. This array of 1s and 0s, is then split on 0 values into subarrays. Finally, we calculate the longest subarray. -[play](https://play.clickhouse.com/play?user=play#V0lUSCBjb21taXRfZGF5cyBBUwogICAgKAogICAgICAgIFNFTEVDVAogICAgICAgICAgICBhdXRob3IsCiAgICAgICAgICAgIGRheSwKICAgICAgICAgICAgYW55KGRheSkgT1ZFUiAoUEFSVElUSU9OIEJZIGF1dGhvciBPUkRFUiBCWSBkYXkgQVNDIFJPV1MgQkVUV0VFTiAxIFBSRUNFRElORyBBTkQgQ1VSUkVOVCBST1cpIEFTIHByZXZpb3VzX2NvbW1pdCwKICAgICAgICAgICAgZGF0ZURpZmYoJ2RheScsIHByZXZpb3VzX2NvbW1pdCwgZGF5KSBBUyBkYXlzX3NpbmNlX2xhc3QsCiAgICAgICAgICAgIGlmKGRheXNfc2luY2VfbGFzdCA9IDEsIDEsIDApIEFTIGNvbnNlY3V0aXZlX2RheQogICAgICAgIEZST00KICAgICAgICAoCiAgICAgICAgICAgIFNFTEVDVAogICAgICAgICAgICAgICAgYXV0aG9yLAogICAgICAgICAgICAgICAgdG9TdGFydE9mRGF5KHRpbWUpIEFTIGRheQogICAgICAgICAgICBGUk9NIGdpdF9jbGlja2hvdXNlLmNvbW1pdHMKICAgICAgICAgICAgR1JPVVAgQlkKICAgICAgICAgICAgICAgIGF1dGhvciwKICAgICAgICAgICAgICAgIGRheQogICAgICAgICAgICBPUkRFUiBCWQogICAgICAgICAgICAgICAgYXV0aG9yIEFTQywKICAgICAgICAgICAgICAgIGRheSBBU0MKICAgICAgICApCiAgICApClNFTEVDVAogICAgYXV0aG9yLAogICAgYXJyYXlNYXgoYXJyYXlNYXAoeCAtPiBsZW5ndGgoeCksIGFycmF5U3BsaXQoeCAtPiAoeCA9IDApLCBncm91cEFycmF5KGNvbnNlY3V0aXZlX2RheSkpKSkgQVMgbWF4X2NvbnNlY3V0aXZlX2RheXMKRlJPTSBjb21taXRfZGF5cwpHUk9VUCBCWSBhdXRob3IKT1JERVIgQlkgbWF4X2NvbnNlY3V0aXZlX2RheXMgREVTQwpMSU1JVCAxMA==) +[play](https://sql.clickhouse.com?query_id=S3E64UYCAMDAYJRSXINVFR) ```sql WITH commit_days AS @@ -2372,7 +2372,7 @@ LIMIT 10 Files can be renamed. When this occurs, we get a rename event, where the `path` column is set to the new path of the file and the `old_path` represents the previous location e.g. -[play](https://play.clickhouse.com/play?user=play#U0VMRUNUCiAgICB0aW1lLAogICAgcGF0aCwKICAgIG9sZF9wYXRoLAogICAgY29tbWl0X2hhc2gsCiAgICBjb21taXRfbWVzc2FnZQpGUk9NIGdpdF9jbGlja2hvdXNlLmZpbGVfY2hhbmdlcwpXSEVSRSAocGF0aCA9ICdzcmMvU3RvcmFnZXMvU3RvcmFnZVJlcGxpY2F0ZWRNZXJnZVRyZWUuY3BwJykgQU5EIChjaGFuZ2VfdHlwZSA9ICdSZW5hbWUnKQ==) +[play](https://sql.clickhouse.com?query_id=AKTW3Z8JZAPQ4H9BH2ZFRX) ```sql SELECT @@ -2410,8 +2410,6 @@ By calling `file_path_history('src/Storages/StorageReplicatedMergeTree.cpp')` we For example, -[play](https://play.clickhouse.com/play?user=play#U0VMRUNUIGZpbGVfcGF0aF9oaXN0b3J5KCdzcmMvU3RvcmFnZXMvU3RvcmFnZVJlcGxpY2F0ZWRNZXJnZVRyZWUuY3BwJykgQVMgcGF0aHMK) - ```sql SELECT file_path_history('src/Storages/StorageReplicatedMergeTree.cpp') AS paths @@ -2424,8 +2422,6 @@ SELECT file_path_history('src/Storages/StorageReplicatedMergeTree.cpp') AS paths We can use this capability to now assemble the commits for the entire history of a file. In this example, we show one commit for each of the `path` values. -[play](https://play.clickhouse.com/play?user=play#U0VMRUNUCiAgICB0aW1lLAogICAgc3Vic3RyaW5nKGNvbW1pdF9oYXNoLCAxLCAxMSkgQVMgY29tbWl0LAogICAgY2hhbmdlX3R5cGUsCiAgICBhdXRob3IsCiAgICBwYXRoLAogICAgY29tbWl0X21lc3NhZ2UKRlJPTSBnaXRfY2xpY2tob3VzZS5maWxlX2NoYW5nZXMKV0hFUkUgcGF0aCBJTiBmaWxlX3BhdGhfaGlzdG9yeSgnc3JjL1N0b3JhZ2VzL1N0b3JhZ2VSZXBsaWNhdGVkTWVyZ2VUcmVlLmNwcCcpCk9SREVSIEJZIHRpbWUgREVTQwpMSU1JVCAxIEJZIHBhdGgKRk9STUFUIFByZXR0eUNvbXBhY3RNb25vQmxvY2s=) - ```sql SELECT time, @@ -2457,8 +2453,6 @@ This is particularly difficult to get an exact result due to the inability to cu An approximate solution, sufficient for a high-level analysis, may look something like this: -[play](https://play.clickhouse.com/play?user=play#U0VMRUNUCiAgICBsaW5lX251bWJlcl9uZXcsCiAgICBhcmdNYXgoYXV0aG9yLCB0aW1lKSwKICAgIGFyZ01heChsaW5lLCB0aW1lKQpGUk9NIGdpdF9jbGlja2hvdXNlLmxpbmVfY2hhbmdlcwpXSEVSRSBwYXRoIElOIGZpbGVfcGF0aF9oaXN0b3J5KCdzcmMvU3RvcmFnZXMvU3RvcmFnZVJlcGxpY2F0ZWRNZXJnZVRyZWUuY3BwJykKR1JPVVAgQlkgbGluZV9udW1iZXJfbmV3Ck9SREVSIEJZIGxpbmVfbnVtYmVyX25ldyBBU0MKTElNSVQgMjA=) - ```sql SELECT line_number_new, diff --git a/docs/en/getting-started/example-datasets/menus.md b/docs/en/getting-started/example-datasets/menus.md index 5a35c1d45bc..a364085eeeb 100644 --- a/docs/en/getting-started/example-datasets/menus.md +++ b/docs/en/getting-started/example-datasets/menus.md @@ -354,4 +354,4 @@ At least they have caviar with vodka. Very nice. ## Online Playground {#playground} -The data is uploaded to ClickHouse Playground, [example](https://play.clickhouse.com/play?user=play#U0VMRUNUCiAgICByb3VuZCh0b1VJbnQzMk9yWmVybyhleHRyYWN0KG1lbnVfZGF0ZSwgJ15cXGR7NH0nKSksIC0xKSBBUyBkLAogICAgY291bnQoKSwKICAgIHJvdW5kKGF2ZyhwcmljZSksIDIpLAogICAgYmFyKGF2ZyhwcmljZSksIDAsIDUwLCAxMDApLAogICAgYW55KGRpc2hfbmFtZSkKRlJPTSBtZW51X2l0ZW1fZGVub3JtCldIRVJFIChtZW51X2N1cnJlbmN5IElOICgnRG9sbGFycycsICcnKSkgQU5EIChkID4gMCkgQU5EIChkIDwgMjAyMikgQU5EIChkaXNoX25hbWUgSUxJS0UgJyVjYXZpYXIlJykKR1JPVVAgQlkgZApPUkRFUiBCWSBkIEFTQw==). +The data is uploaded to ClickHouse Playground, [example](https://sql.clickhouse.com?query_id=KB5KQJJFNBKHE5GBUJCP1B). diff --git a/docs/en/getting-started/example-datasets/ontime.md b/docs/en/getting-started/example-datasets/ontime.md index 9efa1afb5c4..5e1f7c9c97f 100644 --- a/docs/en/getting-started/example-datasets/ontime.md +++ b/docs/en/getting-started/example-datasets/ontime.md @@ -386,7 +386,7 @@ ORDER BY c DESC LIMIT 10; ``` -You can also play with the data in Playground, [example](https://play.clickhouse.com/play?user=play#U0VMRUNUIERheU9mV2VlaywgY291bnQoKikgQVMgYwpGUk9NIG9udGltZQpXSEVSRSBZZWFyPj0yMDAwIEFORCBZZWFyPD0yMDA4CkdST1VQIEJZIERheU9mV2VlawpPUkRFUiBCWSBjIERFU0M7Cg==). +You can also play with the data in Playground, [example](https://sql.clickhouse.com?query_id=M4FSVBVMSHY98NKCQP8N4K). This performance test was created by Vadim Tkachenko. See: diff --git a/docs/en/getting-started/example-datasets/opensky.md b/docs/en/getting-started/example-datasets/opensky.md index c0b4d96725d..22f88ce274a 100644 --- a/docs/en/getting-started/example-datasets/opensky.md +++ b/docs/en/getting-started/example-datasets/opensky.md @@ -417,4 +417,4 @@ Result: ### Online Playground {#playground} -You can test other queries to this data set using the interactive resource [Online Playground](https://play.clickhouse.com/play?user=play). For example, [like this](https://play.clickhouse.com/play?user=play#U0VMRUNUCiAgICBvcmlnaW4sCiAgICBjb3VudCgpLAogICAgcm91bmQoYXZnKGdlb0Rpc3RhbmNlKGxvbmdpdHVkZV8xLCBsYXRpdHVkZV8xLCBsb25naXR1ZGVfMiwgbGF0aXR1ZGVfMikpKSBBUyBkaXN0YW5jZSwKICAgIGJhcihkaXN0YW5jZSwgMCwgMTAwMDAwMDAsIDEwMCkgQVMgYmFyCkZST00gb3BlbnNreQpXSEVSRSBvcmlnaW4gIT0gJycKR1JPVVAgQlkgb3JpZ2luCk9SREVSIEJZIGNvdW50KCkgREVTQwpMSU1JVCAxMDA=). However, please note that you cannot create temporary tables here. +You can test other queries to this data set using the interactive resource [Online Playground](https://sql.clickhouse.com). For example, [like this](https://sql.clickhouse.com?query_id=BIPDVQNIGVEZFQYFEFQB7O). However, please note that you cannot create temporary tables here. diff --git a/docs/en/getting-started/example-datasets/recipes.md b/docs/en/getting-started/example-datasets/recipes.md index a8808e376e0..78520a34248 100644 --- a/docs/en/getting-started/example-datasets/recipes.md +++ b/docs/en/getting-started/example-datasets/recipes.md @@ -335,4 +335,4 @@ Result: ### Online Playground -The dataset is also available in the [Online Playground](https://play.clickhouse.com/play?user=play#U0VMRUNUCiAgICBhcnJheUpvaW4oTkVSKSBBUyBrLAogICAgY291bnQoKSBBUyBjCkZST00gcmVjaXBlcwpHUk9VUCBCWSBrCk9SREVSIEJZIGMgREVTQwpMSU1JVCA1MA==). +The dataset is also available in the [Online Playground](https://sql.clickhouse.com?query_id=HQXNQZE26Z1QWYP9KC76ML). diff --git a/docs/en/getting-started/example-datasets/uk-price-paid.md b/docs/en/getting-started/example-datasets/uk-price-paid.md index 8ed79c3986f..edc9b0956a9 100644 --- a/docs/en/getting-started/example-datasets/uk-price-paid.md +++ b/docs/en/getting-started/example-datasets/uk-price-paid.md @@ -447,4 +447,4 @@ With projection: 100 rows in set. Elapsed: 0.336 sec. Processed 17.32 thousand r ### Test it in the Playground {#playground} -The dataset is also available in the [Online Playground](https://play.clickhouse.com/play?user=play#U0VMRUNUIHRvd24sIGRpc3RyaWN0LCBjb3VudCgpIEFTIGMsIHJvdW5kKGF2ZyhwcmljZSkpIEFTIHByaWNlLCBiYXIocHJpY2UsIDAsIDUwMDAwMDAsIDEwMCkgRlJPTSB1a19wcmljZV9wYWlkIFdIRVJFIGRhdGUgPj0gJzIwMjAtMDEtMDEnIEdST1VQIEJZIHRvd24sIGRpc3RyaWN0IEhBVklORyBjID49IDEwMCBPUkRFUiBCWSBwcmljZSBERVNDIExJTUlUIDEwMA==). +The dataset is also available in the [Online Playground](https://sql.clickhouse.com?query_id=TRCWH5ZETY4SEEK8ISCCAX). diff --git a/docs/en/getting-started/playground.md b/docs/en/getting-started/playground.md index 6a6d4092177..80b6f9a9889 100644 --- a/docs/en/getting-started/playground.md +++ b/docs/en/getting-started/playground.md @@ -8,7 +8,7 @@ slug: /en/getting-started/playground # ClickHouse Playground -[ClickHouse Playground](https://play.clickhouse.com/play?user=play) allows people to experiment with ClickHouse by running queries instantly, without setting up their server or cluster. +[ClickHouse Playground](https://sql.clickhouse.com) allows people to experiment with ClickHouse by running queries instantly, without setting up their server or cluster. Several example datasets are available in Playground. You can make queries to Playground using any HTTP client, for example [curl](https://curl.haxx.se) or [wget](https://www.gnu.org/software/wget/), or set up a connection using [JDBC](../interfaces/jdbc.md) or [ODBC](../interfaces/odbc.md) drivers. More information about software products that support ClickHouse is available [here](../integrations/index.mdx). diff --git a/docs/en/operations/opentelemetry.md b/docs/en/operations/opentelemetry.md index 48078197309..9f3a48dfa5a 100644 --- a/docs/en/operations/opentelemetry.md +++ b/docs/en/operations/opentelemetry.md @@ -33,7 +33,7 @@ The tags or attributes are saved as two parallel arrays, containing the keys and ## Log-query-settings -ClickHouse allows you to log changes to query settings during query execution. When enabled, any modifications made to query settings will be recorded in the OpenTelemetry span log. This feature is particularly useful in production environments for tracking configuration changes that may affect query performance. +Setting [log_query_settings](settings/settings.md) allows log changes to query settings during query execution. When enabled, any modifications made to query settings will be recorded in the OpenTelemetry span log. This feature is particularly useful in production environments for tracking configuration changes that may affect query performance. ## Integration with monitoring systems diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md index 79407d46ce0..b6238487725 100644 --- a/docs/en/operations/server-configuration-parameters/settings.md +++ b/docs/en/operations/server-configuration-parameters/settings.md @@ -1488,6 +1488,8 @@ Keys: - `formatting` – Log format for console output. Currently, only `json` is supported). - `use_syslog` - Also forward log output to syslog. - `syslog_level` - Log level for logging to syslog. +- `message_regexp` - Only log messages that match this regular expression. Defaults to `""`, indicating no filtering. +- `message_regexp_negative` - Only log messages that don't match this regular expression. Defaults to `""`, indicating no filtering. **Log format specifiers** @@ -1576,6 +1578,28 @@ The log level of individual log names can be overridden. For example, to mute al ``` +**Regular Expression Filtering** + +The messages logged can be filtered using regular expressions using `message_regexp` and `message_regexp_negative`. This can be done on a per-level basis or globally. If both a global and logger-specific pattern is specified, the global pattern is overridden (ignored) and only the logger-specific pattern applies. The positive and negative patterns are considered independently for this situation. Note: Using this feature may cause a slight slowdown in performance. + + +```xml + + trace + + .*Trace.* + + + + + executeQuery + .*Read.* + .*from.* + + + +``` + ### syslog To write log messages additionally to syslog: diff --git a/docs/en/operations/settings/merge-tree-settings.md b/docs/en/operations/settings/merge-tree-settings.md index 2fd34c4067c..45c4cdf9458 100644 --- a/docs/en/operations/settings/merge-tree-settings.md +++ b/docs/en/operations/settings/merge-tree-settings.md @@ -1079,6 +1079,8 @@ Possible values: Default value: 0 bytes. +Note that if both `min_free_disk_bytes_to_perform_insert` and `min_free_disk_ratio_to_perform_insert` are specified, ClickHouse will count on the value that will allow to perform inserts on a bigger amount of free memory. + ## min_free_disk_ratio_to_perform_insert The minimum free to total disk space ratio to perform an `INSERT`. Must be a floating point value between 0 and 1. Note that this setting: diff --git a/docs/en/operations/system-tables/azure_queue_settings.md b/docs/en/operations/system-tables/azure_queue_settings.md new file mode 100644 index 00000000000..89235691110 --- /dev/null +++ b/docs/en/operations/system-tables/azure_queue_settings.md @@ -0,0 +1,20 @@ +--- +slug: /en/operations/system-tables/azure_queue_settings +--- +# azure_queue_settings + +Contains information about settings of [AzureQueue](../../engines/table-engines/integrations/azure-queue.md) tables. +Available from `24.10` server version. + +Columns: + +- `database` ([String](../../sql-reference/data-types/string.md)) — Table name. +- `table` ([String](../../sql-reference/data-types/string.md)) — Database name. +- `name` ([String](../../sql-reference/data-types/string.md)) — Setting name. +- `value` ([String](../../sql-reference/data-types/string.md)) — Setting value. +- `changed` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Whether the setting was explicitly defined in the config or explicitly changed. +- `description` ([String](../../sql-reference/data-types/string.md)) — Setting description. +- `alterable` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Shows whether the setting can be changes via `ALTER TABLE ... MODIFY SETTING`. + - `0` — Current user can alter the setting. + - `1` — Current user can’t alter the setting. +- `type` ([String](../../sql-reference/data-types/string.md)) — Setting type (implementation specific string value). diff --git a/docs/en/operations/system-tables/part_log.md b/docs/en/operations/system-tables/part_log.md index 2ad2ae68ab5..f3cf013b4a0 100644 --- a/docs/en/operations/system-tables/part_log.md +++ b/docs/en/operations/system-tables/part_log.md @@ -13,10 +13,12 @@ The `system.part_log` table contains the following columns: - `query_id` ([String](../../sql-reference/data-types/string.md)) — Identifier of the `INSERT` query that created this data part. - `event_type` ([Enum8](../../sql-reference/data-types/enum.md)) — Type of the event that occurred with the data part. Can have one of the following values: - `NewPart` — Inserting of a new data part. - - `MergeParts` — Merging of data parts. + - `MergePartsStart` — Merging of data parts has started. + - `MergeParts` — Merging of data parts has finished. - `DownloadPart` — Downloading a data part. - `RemovePart` — Removing or detaching a data part using [DETACH PARTITION](../../sql-reference/statements/alter/partition.md#alter_detach-partition). - - `MutatePart` — Mutating of a data part. + - `MutatePartStart` — Mutating of a data part has started. + - `MutatePart` — Mutating of a data part has finished. - `MovePart` — Moving the data part from the one disk to another one. - `merge_reason` ([Enum8](../../sql-reference/data-types/enum.md)) — The reason for the event with type `MERGE_PARTS`. Can have one of the following values: - `NotAMerge` — The current event has the type other than `MERGE_PARTS`. diff --git a/docs/en/operations/system-tables/s3_queue_settings.md b/docs/en/operations/system-tables/s3_queue_settings.md new file mode 100644 index 00000000000..87e067b35fb --- /dev/null +++ b/docs/en/operations/system-tables/s3_queue_settings.md @@ -0,0 +1,20 @@ +--- +slug: /en/operations/system-tables/s3_queue_settings +--- +# s3_queue_settings + +Contains information about settings of [S3Queue](../../engines/table-engines/integrations/s3queue.md) tables. +Available from `24.10` server version. + +Columns: + +- `database` ([String](../../sql-reference/data-types/string.md)) — Table name. +- `table` ([String](../../sql-reference/data-types/string.md)) — Database name. +- `name` ([String](../../sql-reference/data-types/string.md)) — Setting name. +- `value` ([String](../../sql-reference/data-types/string.md)) — Setting value. +- `changed` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Whether the setting was explicitly defined in the config or explicitly changed. +- `description` ([String](../../sql-reference/data-types/string.md)) — Setting description. +- `alterable` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Shows whether the setting can be changes via `ALTER TABLE ... MODIFY SETTING`. + - `0` — Current user can alter the setting. + - `1` — Current user can’t alter the setting. +- `type` ([String](../../sql-reference/data-types/string.md)) — Setting type (implementation specific string value). diff --git a/docs/en/sql-reference/statements/create/table.md b/docs/en/sql-reference/statements/create/table.md index ab44f545430..a9fc5712b4d 100644 --- a/docs/en/sql-reference/statements/create/table.md +++ b/docs/en/sql-reference/statements/create/table.md @@ -427,19 +427,6 @@ High compression levels are useful for asymmetric scenarios, like compress once, ZSTD_QAT is not available in ClickHouse Cloud. ::: -#### DEFLATE_QPL - -`DEFLATE_QPL` — [Deflate compression algorithm](https://github.com/intel/qpl) implemented by Intel® Query Processing Library. Some limitations apply: - -- DEFLATE_QPL is disabled by default and can only be used after enabling configuration setting [enable_deflate_qpl_codec](../../../operations/settings/settings.md#enable_deflate_qpl_codec). -- DEFLATE_QPL requires a ClickHouse build compiled with SSE 4.2 instructions (by default, this is the case). Refer to [Build Clickhouse with DEFLATE_QPL](/docs/en/development/building_and_benchmarking_deflate_qpl.md/#Build-Clickhouse-with-DEFLATE_QPL) for more details. -- DEFLATE_QPL works best if the system has a Intel® IAA (In-Memory Analytics Accelerator) offloading device. Refer to [Accelerator Configuration](https://intel.github.io/qpl/documentation/get_started_docs/installation.html#accelerator-configuration) and [Benchmark with DEFLATE_QPL](/docs/en/development/building_and_benchmarking_deflate_qpl.md/#Run-Benchmark-with-DEFLATE_QPL) for more details. -- DEFLATE_QPL-compressed data can only be transferred between ClickHouse nodes compiled with SSE 4.2 enabled. - -:::note -DEFLATE_QPL is not available in ClickHouse Cloud. -::: - ### Specialized Codecs These codecs are designed to make compression more effective by exploiting specific features of the data. Some of these codecs do not compress data themselves, they instead preprocess the data such that a second compression stage using a general-purpose codec can achieve a higher data compression rate. diff --git a/docs/ru/development/contrib.md b/docs/ru/development/contrib.md index f3a88a2da0c..67da2b2a6bf 100644 --- a/docs/ru/development/contrib.md +++ b/docs/ru/development/contrib.md @@ -93,7 +93,7 @@ sidebar_label: "Используемые сторонние библиотеки SELECT library_name, license_type, license_path FROM system.licenses ORDER BY library_name COLLATE 'en'; ``` -[Пример](https://play.clickhouse.com/play?user=play#U0VMRUNUIGxpYnJhcnlfbmFtZSwgbGljZW5zZV90eXBlLCBsaWNlbnNlX3BhdGggRlJPTSBzeXN0ZW0ubGljZW5zZXMgT1JERVIgQlkgbGlicmFyeV9uYW1lIENPTExBVEUgJ2VuJw==) +[Пример](https://sql.clickhouse.com?query_id=478GCPU7LRTSZJBNY3EJT3) ## Рекомендации по добавлению сторонних библиотек и поддержанию в них пользовательских изменений {#adding-third-party-libraries} diff --git a/docs/ru/getting-started/example-datasets/brown-benchmark.md b/docs/ru/getting-started/example-datasets/brown-benchmark.md index c830d639095..d37be9f48d5 100644 --- a/docs/ru/getting-started/example-datasets/brown-benchmark.md +++ b/docs/ru/getting-started/example-datasets/brown-benchmark.md @@ -412,4 +412,4 @@ ORDER BY yr, mo; ``` -Данные также доступны для работы с интерактивными запросами через [Playground](https://play.clickhouse.com/play?user=play), [пример](https://play.clickhouse.com/play?user=play#U0VMRUNUIG1hY2hpbmVfbmFtZSwKICAgICAgIE1JTihjcHUpIEFTIGNwdV9taW4sCiAgICAgICBNQVgoY3B1KSBBUyBjcHVfbWF4LAogICAgICAgQVZHKGNwdSkgQVMgY3B1X2F2ZywKICAgICAgIE1JTihuZXRfaW4pIEFTIG5ldF9pbl9taW4sCiAgICAgICBNQVgobmV0X2luKSBBUyBuZXRfaW5fbWF4LAogICAgICAgQVZHKG5ldF9pbikgQVMgbmV0X2luX2F2ZywKICAgICAgIE1JTihuZXRfb3V0KSBBUyBuZXRfb3V0X21pbiwKICAgICAgIE1BWChuZXRfb3V0KSBBUyBuZXRfb3V0X21heCwKICAgICAgIEFWRyhuZXRfb3V0KSBBUyBuZXRfb3V0X2F2ZwpGUk9NICgKICBTRUxFQ1QgbWFjaGluZV9uYW1lLAogICAgICAgICBDT0FMRVNDRShjcHVfdXNlciwgMC4wKSBBUyBjcHUsCiAgICAgICAgIENPQUxFU0NFKGJ5dGVzX2luLCAwLjApIEFTIG5ldF9pbiwKICAgICAgICAgQ09BTEVTQ0UoYnl0ZXNfb3V0LCAwLjApIEFTIG5ldF9vdXQKICBGUk9NIG1nYmVuY2gubG9nczEKICBXSEVSRSBtYWNoaW5lX25hbWUgSU4gKCdhbmFuc2knLCdhcmFnb2cnLCd1cmQnKQogICAgQU5EIGxvZ190aW1lID49IFRJTUVTVEFNUCAnMjAxNy0wMS0xMSAwMDowMDowMCcKKSBBUyByCkdST1VQIEJZIG1hY2hpbmVfbmFtZQ==). +Данные также доступны для работы с интерактивными запросами через [Playground](https://sql.clickhouse.com), [пример](https://sql.clickhouse.com?query_id=1MXMHASDLEQIP4P1D1STND). diff --git a/docs/ru/getting-started/example-datasets/cell-towers.md b/docs/ru/getting-started/example-datasets/cell-towers.md index cf1a02ae8f0..2f91bed1c04 100644 --- a/docs/ru/getting-started/example-datasets/cell-towers.md +++ b/docs/ru/getting-started/example-datasets/cell-towers.md @@ -126,4 +126,4 @@ SELECT count() FROM cell_towers WHERE pointInPolygon((lon, lat), (SELECT * FROM 1 rows in set. Elapsed: 0.067 sec. Processed 43.28 million rows, 692.42 MB (645.83 million rows/s., 10.33 GB/s.) ``` -Вы можете протестировать другие запросы с помощью интерактивного ресурса [Playground](https://play.clickhouse.com/play?user=play). Например, [вот так](https://play.clickhouse.com/play?user=play#U0VMRUNUIG1jYywgY291bnQoKSBGUk9NIGNlbGxfdG93ZXJzIEdST1VQIEJZIG1jYyBPUkRFUiBCWSBjb3VudCgpIERFU0M=). Однако, обратите внимание, что здесь нельзя создавать временные таблицы. +Вы можете протестировать другие запросы с помощью интерактивного ресурса [Playground](https://sql.clickhouse.com). Например, [вот так](https://sql.clickhouse.com?query_id=UV8M4MAGS2PWAUOAYAAARM). Однако, обратите внимание, что здесь нельзя создавать временные таблицы. diff --git a/docs/ru/getting-started/example-datasets/recipes.md b/docs/ru/getting-started/example-datasets/recipes.md index b91fe3314ff..860d1ff450c 100644 --- a/docs/ru/getting-started/example-datasets/recipes.md +++ b/docs/ru/getting-started/example-datasets/recipes.md @@ -338,4 +338,4 @@ WHERE title = 'Chocolate-Strawberry-Orange Wedding Cake'; ### Online Playground -Этот набор данных доступен в [Online Playground](https://play.clickhouse.com/play?user=play#U0VMRUNUCiAgICBhcnJheUpvaW4oTkVSKSBBUyBrLAogICAgY291bnQoKSBBUyBjCkZST00gcmVjaXBlcwpHUk9VUCBCWSBrCk9SREVSIEJZIGMgREVTQwpMSU1JVCA1MA==). +Этот набор данных доступен в [Online Playground](https://sql.clickhouse.com?query_id=HQXNQZE26Z1QWYP9KC76ML). diff --git a/docs/ru/getting-started/playground.md b/docs/ru/getting-started/playground.md index a2d5498fb9a..b4ec89784ac 100644 --- a/docs/ru/getting-started/playground.md +++ b/docs/ru/getting-started/playground.md @@ -6,7 +6,7 @@ sidebar_label: Playground # ClickHouse Playground {#clickhouse-playground} -[ClickHouse Playground](https://play.clickhouse.com/play?user=play) позволяет пользователям экспериментировать с ClickHouse, выполняя запросы мгновенно, без необходимости настройки сервера или кластера. +[ClickHouse Playground](https://sql.clickhouse.com) позволяет пользователям экспериментировать с ClickHouse, выполняя запросы мгновенно, без необходимости настройки сервера или кластера. В Playground доступны несколько примеров наборов данных. Вы можете выполнять запросы к Playground, используя любой HTTP-клиент, например [curl](https://curl.haxx.se) или [wget](https://www.gnu.org/software/wget/), или настроить соединение, используя драйверы [JDBC](../interfaces/jdbc.md) или [ODBC](../interfaces/odbc.md). Дополнительную информацию о программных продуктах, поддерживающих ClickHouse, можно найти [здесь](../interfaces/index.md). diff --git a/docs/zh/getting-started/example-datasets/brown-benchmark.mdx b/docs/zh/getting-started/example-datasets/brown-benchmark.mdx index 6db4982f50f..74bfeb58d6d 100644 --- a/docs/zh/getting-started/example-datasets/brown-benchmark.mdx +++ b/docs/zh/getting-started/example-datasets/brown-benchmark.mdx @@ -457,4 +457,4 @@ ORDER BY yr, mo; ``` -此数据集可在 [Playground](https://play.clickhouse.com/play?user=play) 中进行交互式的请求, [example](https://play.clickhouse.com/play?user=play#U0VMRUNUIG1hY2hpbmVfbmFtZSwKICAgICAgIE1JTihjcHUpIEFTIGNwdV9taW4sCiAgICAgICBNQVgoY3B1KSBBUyBjcHVfbWF4LAogICAgICAgQVZHKGNwdSkgQVMgY3B1X2F2ZywKICAgICAgIE1JTihuZXRfaW4pIEFTIG5ldF9pbl9taW4sCiAgICAgICBNQVgobmV0X2luKSBBUyBuZXRfaW5fbWF4LAogICAgICAgQVZHKG5ldF9pbikgQVMgbmV0X2luX2F2ZywKICAgICAgIE1JTihuZXRfb3V0KSBBUyBuZXRfb3V0X21pbiwKICAgICAgIE1BWChuZXRfb3V0KSBBUyBuZXRfb3V0X21heCwKICAgICAgIEFWRyhuZXRfb3V0KSBBUyBuZXRfb3V0X2F2ZwpGUk9NICgKICBTRUxFQ1QgbWFjaGluZV9uYW1lLAogICAgICAgICBDT0FMRVNDRShjcHVfdXNlciwgMC4wKSBBUyBjcHUsCiAgICAgICAgIENPQUxFU0NFKGJ5dGVzX2luLCAwLjApIEFTIG5ldF9pbiwKICAgICAgICAgQ09BTEVTQ0UoYnl0ZXNfb3V0LCAwLjApIEFTIG5ldF9vdXQKICBGUk9NIG1nYmVuY2gubG9nczEKICBXSEVSRSBtYWNoaW5lX25hbWUgSU4gKCdhbmFuc2knLCdhcmFnb2cnLCd1cmQnKQogICAgQU5EIGxvZ190aW1lID49IFRJTUVTVEFNUCAnMjAxNy0wMS0xMSAwMDowMDowMCcKKSBBUyByCkdST1VQIEJZIG1hY2hpbmVfbmFtZQ==). +此数据集可在 [Playground](https://sql.clickhouse.com) 中进行交互式的请求, [example](https://sql.clickhouse.com?query_id=1MXMHASDLEQIP4P1D1STND). diff --git a/docs/zh/getting-started/example-datasets/cell-towers.mdx b/docs/zh/getting-started/example-datasets/cell-towers.mdx index 9738680519a..b98e92c378a 100644 --- a/docs/zh/getting-started/example-datasets/cell-towers.mdx +++ b/docs/zh/getting-started/example-datasets/cell-towers.mdx @@ -228,5 +228,5 @@ WHERE pointInPolygon((lon, lat), (SELECT * FROM moscow)) 1 rows in set. Elapsed: 0.067 sec. Processed 43.28 million rows, 692.42 MB (645.83 million rows/s., 10.33 GB/s.) ``` -虽然不能创建临时表,但此数据集仍可在 [Playground](https://play.clickhouse.com/play?user=play) 中进行交互式的请求, [example](https://play.clickhouse.com/play?user=play#U0VMRUNUIG1jYywgY291bnQoKSBGUk9NIGNlbGxfdG93ZXJzIEdST1VQIEJZIG1jYyBPUkRFUiBCWSBjb3VudCgpIERFU0M=). +虽然不能创建临时表,但此数据集仍可在 [Playground](https://sql.clickhouse.com) 中进行交互式的请求, [example](https://sql.clickhouse.com?query_id=UV8M4MAGS2PWAUOAYAAARM). diff --git a/docs/zh/getting-started/example-datasets/menus.mdx b/docs/zh/getting-started/example-datasets/menus.mdx index 10e9f2bd318..33ec031c1ad 100644 --- a/docs/zh/getting-started/example-datasets/menus.mdx +++ b/docs/zh/getting-started/example-datasets/menus.mdx @@ -349,4 +349,4 @@ ORDER BY d ASC; ## 在线 Playground{#playground} -此数据集已经上传到了 ClickHouse Playground 中,[example](https://play.clickhouse.com/play?user=play#U0VMRUNUCiAgICByb3VuZCh0b1VJbnQzMk9yWmVybyhleHRyYWN0KG1lbnVfZGF0ZSwgJ15cXGR7NH0nKSksIC0xKSBBUyBkLAogICAgY291bnQoKSwKICAgIHJvdW5kKGF2ZyhwcmljZSksIDIpLAogICAgYmFyKGF2ZyhwcmljZSksIDAsIDUwLCAxMDApLAogICAgYW55KGRpc2hfbmFtZSkKRlJPTSBtZW51X2l0ZW1fZGVub3JtCldIRVJFIChtZW51X2N1cnJlbmN5IElOICgnRG9sbGFycycsICcnKSkgQU5EIChkID4gMCkgQU5EIChkIDwgMjAyMikgQU5EIChkaXNoX25hbWUgSUxJS0UgJyVjYXZpYXIlJykKR1JPVVAgQlkgZApPUkRFUiBCWSBkIEFTQw==)。 +此数据集已经上传到了 ClickHouse Playground 中,[example](https://sql.clickhouse.com?query_id=KB5KQJJFNBKHE5GBUJCP1B)。 diff --git a/docs/zh/getting-started/example-datasets/opensky.mdx b/docs/zh/getting-started/example-datasets/opensky.mdx index b79c02ab780..0116515b28f 100644 --- a/docs/zh/getting-started/example-datasets/opensky.mdx +++ b/docs/zh/getting-started/example-datasets/opensky.mdx @@ -413,4 +413,4 @@ ORDER BY k ASC; ### 在线 Playground {#playground} -你可以使用交互式资源 [Online Playground](https://play.clickhouse.com/play?user=play) 来尝试对此数据集的其他查询。 例如, [执行这个查询](https://play.clickhouse.com/play?user=play#U0VMRUNUCiAgICBvcmlnaW4sCiAgICBjb3VudCgpLAogICAgcm91bmQoYXZnKGdlb0Rpc3RhbmNlKGxvbmdpdHVkZV8xLCBsYXRpdHVkZV8xLCBsb25naXR1ZGVfMiwgbGF0aXR1ZGVfMikpKSBBUyBkaXN0YW5jZSwKICAgIGJhcihkaXN0YW5jZSwgMCwgMTAwMDAwMDAsIDEwMCkgQVMgYmFyCkZST00gb3BlbnNreQpXSEVSRSBvcmlnaW4gIT0gJycKR1JPVVAgQlkgb3JpZ2luCk9SREVSIEJZIGNvdW50KCkgREVTQwpMSU1JVCAxMDA=). 但是,请注意无法在 Playground 中创建临时表。 +你可以使用交互式资源 [Online Playground](https://sql.clickhouse.com) 来尝试对此数据集的其他查询。 例如, [执行这个查询](https://sql.clickhouse.com?query_id=BIPDVQNIGVEZFQYFEFQB7O). 但是,请注意无法在 Playground 中创建临时表。 diff --git a/docs/zh/getting-started/example-datasets/recipes.mdx b/docs/zh/getting-started/example-datasets/recipes.mdx index b7f8fe8eafd..a7b3ddbe0da 100644 --- a/docs/zh/getting-started/example-datasets/recipes.mdx +++ b/docs/zh/getting-started/example-datasets/recipes.mdx @@ -334,6 +334,6 @@ WHERE title = 'Chocolate-Strawberry-Orange Wedding Cake' ### 在线 Playground -此数据集也可在 [在线 Playground](https://play.clickhouse.com/play?user=play#U0VMRUNUCiAgICBhcnJheUpvaW4oTkVSKSBBUyBrLAogICAgY291bnQoKSBBUyBjCkZST00gcmVjaXBlcwpHUk9VUCBCWSBrCk9SREVSIEJZIGMgREVTQwpMSU1JVCA1MA==) 中体验。 +此数据集也可在 [在线 Playground](https://sql.clickhouse.com?query_id=HQXNQZE26Z1QWYP9KC76ML) 中体验。 [原文链接](https://clickhouse.com/docs/en/getting-started/example-datasets/recipes/) diff --git a/docs/zh/getting-started/example-datasets/uk-price-paid.mdx b/docs/zh/getting-started/example-datasets/uk-price-paid.mdx index 7d4c299b919..158ce08216c 100644 --- a/docs/zh/getting-started/example-datasets/uk-price-paid.mdx +++ b/docs/zh/getting-started/example-datasets/uk-price-paid.mdx @@ -447,4 +447,4 @@ With projection: 100 rows in set. Elapsed: 0.336 sec. Processed 17.32 thousand r ### 在 Playground 上测试{#playground} -也可以在 [Online Playground](https://play.clickhouse.com/play?user=play#U0VMRUNUIHRvd24sIGRpc3RyaWN0LCBjb3VudCgpIEFTIGMsIHJvdW5kKGF2ZyhwcmljZSkpIEFTIHByaWNlLCBiYXIocHJpY2UsIDAsIDUwMDAwMDAsIDEwMCkgRlJPTSB1a19wcmljZV9wYWlkIFdIRVJFIGRhdGUgPj0gJzIwMjAtMDEtMDEnIEdST1VQIEJZIHRvd24sIGRpc3RyaWN0IEhBVklORyBjID49IDEwMCBPUkRFUiBCWSBwcmljZSBERVNDIExJTUlUIDEwMA==) 上找到此数据集。 +也可以在 [Online Playground](https://sql.clickhouse.com?query_id=TRCWH5ZETY4SEEK8ISCCAX) 上找到此数据集。 diff --git a/docs/zh/getting-started/playground.md b/docs/zh/getting-started/playground.md index 2874b307cee..5d8927d8a6c 100644 --- a/docs/zh/getting-started/playground.md +++ b/docs/zh/getting-started/playground.md @@ -6,7 +6,7 @@ sidebar_label: 体验平台 # ClickHouse Playground {#clickhouse-playground} -无需搭建服务或集群,[ClickHouse Playground](https://play.clickhouse.com/play?user=play)允许人们通过执行查询语句立即体验ClickHouse,在Playground中我们提供了一些示例数据集。 +无需搭建服务或集群,[ClickHouse Playground](https://sql.clickhouse.com)允许人们通过执行查询语句立即体验ClickHouse,在Playground中我们提供了一些示例数据集。 你可以使用任意HTTP客户端向Playground提交查询语句,比如[curl](https://curl.haxx.se)或者[wget](https://www.gnu.org/software/wget/),也可以通过[JDBC](../interfaces/jdbc.md)或者[ODBC](../interfaces/odbc.md)驱动建立连接,更多信息详见[客户端](../interfaces/index.md)。 diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index ffb029404d3..4aab7fcae14 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -1160,6 +1160,9 @@ void Client::processOptions(const OptionsDescription & options_description, /// (There is no need to copy the context because clickhouse-client has no background tasks so it won't use that context in parallel.) client_context = global_context; initClientContext(); + + /// Allow to pass-through unknown settings to the server. + client_context->getAccessControl().allowAllSettings(); } diff --git a/programs/compressor/Compressor.cpp b/programs/compressor/Compressor.cpp index 050bb495024..819f16cfd64 100644 --- a/programs/compressor/Compressor.cpp +++ b/programs/compressor/Compressor.cpp @@ -80,7 +80,6 @@ int mainEntryClickHouseCompressor(int argc, char ** argv) ("block-size,b", po::value()->default_value(DBMS_DEFAULT_BUFFER_SIZE), "compress in blocks of specified size") ("hc", "use LZ4HC instead of LZ4") ("zstd", "use ZSTD instead of LZ4") - ("deflate_qpl", "use deflate_qpl instead of LZ4") ("codec", po::value>()->multitoken(), "use codecs combination instead of LZ4") ("level", po::value(), "compression level for codecs specified via flags") ("none", "use no compression instead of LZ4") @@ -107,7 +106,6 @@ int mainEntryClickHouseCompressor(int argc, char ** argv) bool decompress = options.count("decompress"); bool use_lz4hc = options.count("hc"); bool use_zstd = options.count("zstd"); - bool use_deflate_qpl = options.count("deflate_qpl"); bool stat_mode = options.count("stat"); bool use_none = options.count("none"); print_stacktrace = options.count("stacktrace"); @@ -116,7 +114,7 @@ int mainEntryClickHouseCompressor(int argc, char ** argv) if (options.count("codec")) codecs = options["codec"].as>(); - if ((use_lz4hc || use_zstd || use_deflate_qpl || use_none) && !codecs.empty()) + if ((use_lz4hc || use_zstd || use_none) && !codecs.empty()) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Wrong options, codec flags like --zstd and --codec options are mutually exclusive"); if (!codecs.empty() && options.count("level")) @@ -128,8 +126,6 @@ int mainEntryClickHouseCompressor(int argc, char ** argv) method_family = "LZ4HC"; else if (use_zstd) method_family = "ZSTD"; - else if (use_deflate_qpl) - method_family = "DEFLATE_QPL"; else if (use_none) method_family = "NONE"; diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 15585ac8d57..35dae614d87 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -2082,7 +2082,7 @@ try auto & access_control = global_context->getAccessControl(); try { - access_control.setUpFromMainConfig(config(), config_path, [&] { return global_context->getZooKeeper(); }); + access_control.setupFromMainConfig(config(), config_path, [&] { return global_context->getZooKeeper(); }); } catch (...) { diff --git a/programs/server/merges.html b/programs/server/merges.html new file mode 100644 index 00000000000..119fb058b0b --- /dev/null +++ b/programs/server/merges.html @@ -0,0 +1,441 @@ + + + + + ClickHouse Merges Visualizer + + + + +
+
+
+ + +
+ + + 10x + 0000-00-00 00:00:00 + +
+
+
+
+ + + diff --git a/src/Access/AccessControl.cpp b/src/Access/AccessControl.cpp index 1b397304a06..e8ee363be1a 100644 --- a/src/Access/AccessControl.cpp +++ b/src/Access/AccessControl.cpp @@ -282,7 +282,7 @@ void AccessControl::shutdown() } -void AccessControl::setUpFromMainConfig(const Poco::Util::AbstractConfiguration & config_, const String & config_path_, +void AccessControl::setupFromMainConfig(const Poco::Util::AbstractConfiguration & config_, const String & config_path_, const zkutil::GetZooKeeper & get_zookeeper_function_) { if (config_.has("custom_settings_prefixes")) @@ -869,4 +869,10 @@ const ExternalAuthenticators & AccessControl::getExternalAuthenticators() const return *external_authenticators; } + +void AccessControl::allowAllSettings() +{ + custom_settings_prefixes->registerPrefixes({""}); +} + } diff --git a/src/Access/AccessControl.h b/src/Access/AccessControl.h index cc1b7b2ca0d..a91686433ec 100644 --- a/src/Access/AccessControl.h +++ b/src/Access/AccessControl.h @@ -57,7 +57,7 @@ public: void shutdown() override; /// Initializes access storage (user directories). - void setUpFromMainConfig(const Poco::Util::AbstractConfiguration & config_, const String & config_path_, + void setupFromMainConfig(const Poco::Util::AbstractConfiguration & config_, const String & config_path_, const zkutil::GetZooKeeper & get_zookeeper_function_); /// Parses access entities from a configuration loaded from users.xml. @@ -238,6 +238,9 @@ public: /// Gets manager of notifications. AccessChangesNotifier & getChangesNotifier(); + /// Allow all setting names - this can be used in clients to pass-through unknown settings to the server. + void allowAllSettings(); + private: class ContextAccessCache; class CustomSettingsPrefixes; diff --git a/src/Access/SettingsConstraints.cpp b/src/Access/SettingsConstraints.cpp index 4305568dd8b..cdf3dac192e 100644 --- a/src/Access/SettingsConstraints.cpp +++ b/src/Access/SettingsConstraints.cpp @@ -223,8 +223,8 @@ void SettingsConstraints::clamp(const Settings & current_settings, SettingsChang }); } -template -bool getNewValueToCheck(const T & current_settings, SettingChange & change, Field & new_value, bool throw_on_failure) +template +bool getNewValueToCheck(const SettingsT & current_settings, SettingChange & change, Field & new_value, bool throw_on_failure) { Field current_value; bool has_current_value = current_settings.tryGet(change.name, current_value); @@ -234,12 +234,12 @@ bool getNewValueToCheck(const T & current_settings, SettingChange & change, Fiel return false; if (throw_on_failure) - new_value = T::castValueUtil(change.name, change.value); + new_value = SettingsT::castValueUtil(change.name, change.value); else { try { - new_value = T::castValueUtil(change.name, change.value); + new_value = SettingsT::castValueUtil(change.name, change.value); } catch (...) { diff --git a/src/AggregateFunctions/IAggregateFunction.cpp b/src/AggregateFunctions/IAggregateFunction.cpp index 7da341cc5b9..4b2394d0713 100644 --- a/src/AggregateFunctions/IAggregateFunction.cpp +++ b/src/AggregateFunctions/IAggregateFunction.cpp @@ -10,6 +10,15 @@ DataTypePtr IAggregateFunction::getStateType() const return std::make_shared(shared_from_this(), argument_types, parameters); } +DataTypePtr IAggregateFunction::getNormalizedStateType() const +{ + DataTypes normalized_argument_types; + normalized_argument_types.reserve(argument_types.size()); + for (const auto & arg : argument_types) + normalized_argument_types.emplace_back(arg->getNormalizedType()); + return std::make_shared(shared_from_this(), normalized_argument_types, parameters); +} + String IAggregateFunction::getDescription() const { String description; diff --git a/src/AggregateFunctions/IAggregateFunction.h b/src/AggregateFunctions/IAggregateFunction.h index f8e7051d635..4f1f5388032 100644 --- a/src/AggregateFunctions/IAggregateFunction.h +++ b/src/AggregateFunctions/IAggregateFunction.h @@ -73,7 +73,7 @@ public: virtual DataTypePtr getStateType() const; /// Same as the above but normalize state types so that variants with the same binary representation will use the same type. - virtual DataTypePtr getNormalizedStateType() const { return getStateType(); } + virtual DataTypePtr getNormalizedStateType() const; /// Returns true if two aggregate functions have the same state representation in memory and the same serialization, /// so state of one aggregate function can be safely used with another. diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 22d20fc82ce..39499cc577d 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -359,12 +359,6 @@ set_source_files_properties( Columns/ColumnString.cpp PROPERTIES COMPILE_FLAGS "${X86_INTRINSICS_FLAGS}") -if (ENABLE_QPL) - set_source_files_properties( - Compression/CompressionCodecDeflateQpl.cpp - PROPERTIES COMPILE_FLAGS "-mwaitpkg") -endif () - target_link_libraries(clickhouse_common_io PUBLIC boost::program_options @@ -591,15 +585,8 @@ endif () target_link_libraries (clickhouse_common_io PRIVATE ch_contrib::lz4) -if (TARGET ch_contrib::qpl) - dbms_target_link_libraries(PUBLIC ch_contrib::qpl) - target_link_libraries (clickhouse_compression PUBLIC ch_contrib::qpl) - target_link_libraries (clickhouse_compression PUBLIC ch_contrib::accel-config) -endif () - -if (TARGET ch_contrib::accel-config AND TARGET ch_contrib::qatzstd_plugin) +if (TARGET ch_contrib::qatzstd_plugin) dbms_target_link_libraries(PUBLIC ch_contrib::qatzstd_plugin) - dbms_target_link_libraries(PUBLIC ch_contrib::accel-config) target_link_libraries(clickhouse_common_io PUBLIC ch_contrib::qatzstd_plugin) endif () diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 8f7cced73ef..23aa7e841cb 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -59,6 +59,7 @@ #include #include #include +#include #include #include #include @@ -1995,7 +1996,7 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin { /// Temporarily apply query settings to context. - std::optional old_settings; + Settings old_settings = client_context->getSettingsRef(); SCOPE_EXIT_SAFE({ try { @@ -2012,45 +2013,15 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin have_error = true; } } - if (old_settings) - client_context->setSettings(*old_settings); + client_context->setSettings(old_settings); }); - - auto apply_query_settings = [&](const IAST & settings_ast) - { - if (!old_settings) - old_settings.emplace(client_context->getSettingsRef()); - client_context->applySettingsChanges(settings_ast.as()->changes); - client_context->resetSettingsToDefaultValue(settings_ast.as()->default_settings); - }; - - const auto * insert = parsed_query->as(); - if (const auto * select = parsed_query->as(); select && select->settings()) - apply_query_settings(*select->settings()); - else if (const auto * select_with_union = parsed_query->as()) - { - const ASTs & children = select_with_union->list_of_selects->children; - if (!children.empty()) - { - // On the client it is enough to apply settings only for the - // last SELECT, since the only thing that is important to apply - // on the client is format settings. - const auto * last_select = children.back()->as(); - if (last_select && last_select->settings()) - { - apply_query_settings(*last_select->settings()); - } - } - } - else if (const auto * query_with_output = parsed_query->as(); query_with_output && query_with_output->settings_ast) - apply_query_settings(*query_with_output->settings_ast); - else if (insert && insert->settings_ast) - apply_query_settings(*insert->settings_ast); + InterpreterSetQuery::applySettingsFromQuery(parsed_query, client_context); if (!connection->checkConnected(connection_parameters.timeouts)) connect(); ASTPtr input_function; + const auto * insert = parsed_query->as(); if (insert && insert->select) insert->tryFindInputFunction(input_function); diff --git a/src/Client/Connection.cpp b/src/Client/Connection.cpp index 416bb2f0b15..667db913630 100644 --- a/src/Client/Connection.cpp +++ b/src/Client/Connection.cpp @@ -55,7 +55,6 @@ namespace Setting { extern const SettingsBool allow_experimental_codecs; extern const SettingsBool allow_suspicious_codecs; - extern const SettingsBool enable_deflate_qpl_codec; extern const SettingsBool enable_zstd_qat_codec; extern const SettingsString network_compression_method; extern const SettingsInt64 network_zstd_compression_level; @@ -811,7 +810,6 @@ void Connection::sendQuery( level, !(*settings)[Setting::allow_suspicious_codecs], (*settings)[Setting::allow_experimental_codecs], - (*settings)[Setting::enable_deflate_qpl_codec], (*settings)[Setting::enable_zstd_qat_codec]); compression_codec = CompressionCodecFactory::instance().get(method, level); } diff --git a/src/Common/HashTable/HashTable.h b/src/Common/HashTable/HashTable.h index 05a257de2e2..f4374a0f2ca 100644 --- a/src/Common/HashTable/HashTable.h +++ b/src/Common/HashTable/HashTable.h @@ -67,19 +67,6 @@ struct HashTableNoState }; -/// These functions can be overloaded for custom types. -namespace ZeroTraits -{ - -template -bool check(const T x) { return x == T{}; } - -template -void set(T & x) { x = T{}; } - -} - - /** Numbers are compared bitwise. * Complex types are compared by operator== as usual (this is important if there are gaps). * @@ -87,18 +74,32 @@ void set(T & x) { x = T{}; } * Otherwise the invariants in hash table probing do not met when NaNs are present. */ template -inline bool bitEquals(T && a, T && b) +inline bool bitEquals(T a, T b) { - using RealT = std::decay_t; - - if constexpr (std::is_floating_point_v) - /// Note that memcmp with constant size is compiler builtin. - return 0 == memcmp(&a, &b, sizeof(RealT)); /// NOLINT + if constexpr (std::is_floating_point_v) + /// Note that memcmp with constant size is a compiler builtin. + return 0 == memcmp(&a, &b, sizeof(T)); /// NOLINT else return a == b; } +/// These functions can be overloaded for custom types. +namespace ZeroTraits +{ + +template +bool check(const T x) +{ + return bitEquals(x, T{}); +} + +template +void set(T & x) { x = T{}; } + +} + + /** * getKey/Mapped -- methods to get key/"mapped" values from the LookupResult returned by find() and * emplace() methods of HashTable. Must not be called for a null LookupResult. diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp index 414e3bef592..3a102238fbe 100644 --- a/src/Common/ProfileEvents.cpp +++ b/src/Common/ProfileEvents.cpp @@ -231,6 +231,7 @@ M(LoadedMarksMemoryBytes, "Size of in-memory representations of loaded marks.", ValueType::Bytes) \ \ M(Merge, "Number of launched background merges.", ValueType::Number) \ + M(MergeSourceParts, "Number of source parts scheduled for merges.", ValueType::Number) \ M(MergedRows, "Rows read for background merges. This is the number of rows before merge.", ValueType::Number) \ M(MergedColumns, "Number of columns merged during the horizontal stage of merges.", ValueType::Number) \ M(GatheredColumns, "Number of columns gathered during the vertical stage of merges.", ValueType::Number) \ diff --git a/src/Common/config.h.in b/src/Common/config.h.in index 86ac054a62c..9d80e9845f4 100644 --- a/src/Common/config.h.in +++ b/src/Common/config.h.in @@ -32,7 +32,6 @@ #cmakedefine01 USE_IDNA #cmakedefine01 USE_NLP #cmakedefine01 USE_VECTORSCAN -#cmakedefine01 USE_QPL #cmakedefine01 USE_QATLIB #cmakedefine01 USE_LIBURING #cmakedefine01 USE_AVRO diff --git a/src/Compression/CompressedReadBufferBase.cpp b/src/Compression/CompressedReadBufferBase.cpp index 2b65f2d690c..22f19139a5f 100644 --- a/src/Compression/CompressedReadBufferBase.cpp +++ b/src/Compression/CompressedReadBufferBase.cpp @@ -317,18 +317,6 @@ void CompressedReadBufferBase::decompress(BufferBase::Buffer & to, size_t size_d codec->decompress(compressed_buffer, static_cast(size_compressed_without_checksum), to.begin()); } -void CompressedReadBufferBase::flushAsynchronousDecompressRequests() const -{ - if (codec) - codec->flushAsynchronousDecompressRequests(); -} - -void CompressedReadBufferBase::setDecompressMode(ICompressionCodec::CodecMode mode) const -{ - if (codec) - codec->setDecompressMode(mode); -} - /// 'compressed_in' could be initialized lazily, but before first call of 'readCompressedData'. CompressedReadBufferBase::CompressedReadBufferBase(ReadBuffer * in, bool allow_different_codecs_, bool external_data_) : compressed_in(in), own_compressed_buffer(0), allow_different_codecs(allow_different_codecs_), external_data(external_data_) diff --git a/src/Compression/CompressedReadBufferBase.h b/src/Compression/CompressedReadBufferBase.h index 4a164a6ce68..b15d05f7e80 100644 --- a/src/Compression/CompressedReadBufferBase.h +++ b/src/Compression/CompressedReadBufferBase.h @@ -64,14 +64,6 @@ protected: /// It is more efficient for compression codec NONE but not suitable if you want to decompress into specific location. void decompress(BufferBase::Buffer & to, size_t size_decompressed, size_t size_compressed_without_checksum); - /// Flush all asynchronous decompress request. - void flushAsynchronousDecompressRequests() const; - - /// Set decompression mode: Synchronous/Asynchronous/SoftwareFallback. - /// The mode is "Synchronous" by default. - /// flushAsynchronousDecompressRequests must be called subsequently once set "Asynchronous" mode. - void setDecompressMode(ICompressionCodec::CodecMode mode) const; - public: /// 'compressed_in' could be initialized lazily, but before first call of 'readCompressedData'. explicit CompressedReadBufferBase(ReadBuffer * in = nullptr, bool allow_different_codecs_ = false, bool external_data_ = false); diff --git a/src/Compression/CompressedReadBufferFromFile.cpp b/src/Compression/CompressedReadBufferFromFile.cpp index 9dc40b8217c..0acfb9d3560 100644 --- a/src/Compression/CompressedReadBufferFromFile.cpp +++ b/src/Compression/CompressedReadBufferFromFile.cpp @@ -90,8 +90,6 @@ void CompressedReadBufferFromFile::seek(size_t offset_in_compressed_file, size_t size_t CompressedReadBufferFromFile::readBig(char * to, size_t n) { size_t bytes_read = 0; - /// The codec mode is only relevant for codecs which support hardware offloading. - ICompressionCodec::CodecMode decompress_mode = ICompressionCodec::CodecMode::Synchronous; bool read_tail = false; /// If there are unread bytes in the buffer, then we copy needed to `to`. @@ -104,28 +102,10 @@ size_t CompressedReadBufferFromFile::readBig(char * to, size_t n) size_t size_decompressed = 0; size_t size_compressed_without_checksum = 0; - ///Try to read block which is entirely located in a single 'compressed_in->' buffer. - size_t new_size_compressed = readCompressedDataBlockForAsynchronous(size_decompressed, size_compressed_without_checksum); - - if (new_size_compressed) - { - /// Current block is entirely located in a single 'compressed_in->' buffer. - /// We can set asynchronous decompression mode if supported to boost performance. - decompress_mode = ICompressionCodec::CodecMode::Asynchronous; - } - else - { - /// Current block cannot be decompressed asynchronously, means it probably span across two compressed_in buffers. - /// Meanwhile, asynchronous requests for previous blocks should be flushed if any. - flushAsynchronousDecompressRequests(); - /// Fallback to generic API - new_size_compressed = readCompressedData(size_decompressed, size_compressed_without_checksum, false); - decompress_mode = ICompressionCodec::CodecMode::Synchronous; - } - size_compressed = 0; /// file_in no longer points to the end of the block in working_buffer. - + size_t new_size_compressed = readCompressedData(size_decompressed, size_compressed_without_checksum, false); if (!new_size_compressed) break; + size_compressed = 0; /// file_in no longer points to the end of the block in working_buffer. auto additional_size_at_the_end_of_buffer = codec->getAdditionalSizeAtTheEndOfBuffer(); @@ -133,7 +113,6 @@ size_t CompressedReadBufferFromFile::readBig(char * to, size_t n) /// need to skip some bytes in decompressed data (seek happened before readBig call). if (nextimpl_working_buffer_offset == 0 && size_decompressed + additional_size_at_the_end_of_buffer <= n - bytes_read) { - setDecompressMode(decompress_mode); decompressTo(to + bytes_read, size_decompressed, size_compressed_without_checksum); bytes_read += size_decompressed; bytes += size_decompressed; @@ -148,8 +127,6 @@ size_t CompressedReadBufferFromFile::readBig(char * to, size_t n) assert(size_decompressed + additional_size_at_the_end_of_buffer > 0); memory.resize(size_decompressed + additional_size_at_the_end_of_buffer); working_buffer = Buffer(memory.data(), &memory[size_decompressed]); - /// Synchronous mode must be set since we need read partial data immediately from working buffer to target buffer. - setDecompressMode(ICompressionCodec::CodecMode::Synchronous); decompress(working_buffer, size_decompressed, size_compressed_without_checksum); /// Read partial data from first block. Won't run here at second block. @@ -168,17 +145,12 @@ size_t CompressedReadBufferFromFile::readBig(char * to, size_t n) assert(size_decompressed + additional_size_at_the_end_of_buffer > 0); memory.resize(size_decompressed + additional_size_at_the_end_of_buffer); working_buffer = Buffer(memory.data(), &memory[size_decompressed]); - // Asynchronous mode can be set here because working_buffer wouldn't be overwritten any more since this is the last block. - setDecompressMode(ICompressionCodec::CodecMode::Asynchronous); decompress(working_buffer, size_decompressed, size_compressed_without_checksum); read_tail = true; break; } } - /// Here we must make sure all asynchronous requests above are completely done. - flushAsynchronousDecompressRequests(); - if (read_tail) { /// Manually take nextimpl_working_buffer_offset into account, because we don't use diff --git a/src/Compression/CompressionCodecDeflateQpl.cpp b/src/Compression/CompressionCodecDeflateQpl.cpp deleted file mode 100644 index 30085762c00..00000000000 --- a/src/Compression/CompressionCodecDeflateQpl.cpp +++ /dev/null @@ -1,490 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#if USE_QPL - -#include "libaccel_config.h" - -#include - -namespace DB -{ -namespace ErrorCodes -{ - extern const int CANNOT_COMPRESS; - extern const int CANNOT_DECOMPRESS; -} - -DeflateQplJobHWPool & DeflateQplJobHWPool::instance() -{ - static DeflateQplJobHWPool pool; - return pool; -} - -DeflateQplJobHWPool::DeflateQplJobHWPool() - : max_hw_jobs(0) - , random_engine(randomSeed()) -{ - LoggerPtr log = getLogger("DeflateQplJobHWPool"); - const char * qpl_version = qpl_get_library_version(); - - // loop all configured workqueue size to get maximum job number. - accfg_ctx * ctx_ptr = nullptr; - auto ctx_status = accfg_new(&ctx_ptr); - SCOPE_EXIT({ accfg_unref(ctx_ptr); }); - if (ctx_status == 0) - { - auto * dev_ptr = accfg_device_get_first(ctx_ptr); - while (dev_ptr != nullptr) - { - for (auto * wq_ptr = accfg_wq_get_first(dev_ptr); wq_ptr != nullptr; wq_ptr = accfg_wq_get_next(wq_ptr)) - max_hw_jobs += accfg_wq_get_size(wq_ptr); - dev_ptr = accfg_device_get_next(dev_ptr); - } - } - else - { - job_pool_ready = false; - LOG_WARNING(log, "Initialization of hardware-assisted DeflateQpl codec failed, falling back to software DeflateQpl codec. Failed to create new libaccel_config context -> status: {}, QPL Version: {}.", ctx_status, qpl_version); - return; - } - - if (max_hw_jobs == 0) - { - job_pool_ready = false; - LOG_WARNING(log, "Initialization of hardware-assisted DeflateQpl codec failed, falling back to software DeflateQpl codec. Failed to get available workqueue size -> total_wq_size: {}, QPL Version: {}.", max_hw_jobs, qpl_version); - return; - } - distribution = std::uniform_int_distribution(0, max_hw_jobs - 1); - /// Get size required for saving a single qpl job object - qpl_get_job_size(qpl_path_hardware, &per_job_size); - /// Allocate job buffer pool for storing all job objects - hw_jobs_buffer = std::make_unique(per_job_size * max_hw_jobs); - hw_job_ptr_locks = std::make_unique(max_hw_jobs); - /// Initialize all job objects in job buffer pool - for (UInt32 index = 0; index < max_hw_jobs; ++index) - { - qpl_job * job_ptr = reinterpret_cast(hw_jobs_buffer.get() + index * per_job_size); - if (auto status = qpl_init_job(qpl_path_hardware, job_ptr); status != QPL_STS_OK) - { - job_pool_ready = false; - LOG_WARNING(log, "Initialization of hardware-assisted DeflateQpl codec failed, falling back to software DeflateQpl codec. Failed to Initialize qpl job -> status: {}, QPL Version: {}.", static_cast(status), qpl_version); - return; - } - unLockJob(index); - } - - job_pool_ready = true; - LOG_DEBUG(log, "Hardware-assisted DeflateQpl codec is ready! QPL Version: {}, max_hw_jobs: {}",qpl_version, max_hw_jobs); -} - -DeflateQplJobHWPool::~DeflateQplJobHWPool() -{ - for (UInt32 i = 0; i < max_hw_jobs; ++i) - { - qpl_job * job_ptr = reinterpret_cast(hw_jobs_buffer.get() + i * per_job_size); - while (!tryLockJob(i)); - qpl_fini_job(job_ptr); - unLockJob(i); - } - job_pool_ready = false; -} - -qpl_job * DeflateQplJobHWPool::acquireJob(UInt32 & job_id) -{ - if (isJobPoolReady()) - { - UInt32 retry = 0; - UInt32 index = distribution(random_engine); - while (!tryLockJob(index)) - { - index = distribution(random_engine); - retry++; - if (retry > max_hw_jobs) - { - return nullptr; - } - } - job_id = max_hw_jobs - index; - assert(index < max_hw_jobs); - return reinterpret_cast(hw_jobs_buffer.get() + index * per_job_size); - } - return nullptr; -} - -void DeflateQplJobHWPool::releaseJob(UInt32 job_id) -{ - if (isJobPoolReady()) - unLockJob(max_hw_jobs - job_id); -} - -bool DeflateQplJobHWPool::tryLockJob(UInt32 index) -{ - bool expected = false; - assert(index < max_hw_jobs); - return hw_job_ptr_locks[index].compare_exchange_strong(expected, true); -} - -void DeflateQplJobHWPool::unLockJob(UInt32 index) -{ - assert(index < max_hw_jobs); - hw_job_ptr_locks[index].store(false); -} - -HardwareCodecDeflateQpl::HardwareCodecDeflateQpl(SoftwareCodecDeflateQpl & sw_codec_) - : log(getLogger("HardwareCodecDeflateQpl")) - , sw_codec(sw_codec_) -{ -} - -HardwareCodecDeflateQpl::~HardwareCodecDeflateQpl() -{ -#ifndef NDEBUG - assert(decomp_async_job_map.empty()); -#else - if (!decomp_async_job_map.empty()) - { - LOG_WARNING(log, "Find un-released job when HardwareCodecDeflateQpl destroy"); - for (auto it : decomp_async_job_map) - { - DeflateQplJobHWPool::instance().releaseJob(it.first); - } - decomp_async_job_map.clear(); - } -#endif -} - -Int32 HardwareCodecDeflateQpl::doCompressData(const char * source, UInt32 source_size, char * dest, UInt32 dest_size) const -{ - UInt32 job_id = 0; - qpl_job * job_ptr = nullptr; - UInt32 compressed_size = 0; - if (!(job_ptr = DeflateQplJobHWPool::instance().acquireJob(job_id))) - { - LOG_INFO(log, "DeflateQpl HW codec failed, falling back to SW codec. (Details: doCompressData->acquireJob fail, probably job pool exhausted)"); - return RET_ERROR; - } - - job_ptr->op = qpl_op_compress; - job_ptr->next_in_ptr = reinterpret_cast(const_cast(source)); - job_ptr->next_out_ptr = reinterpret_cast(dest); - job_ptr->available_in = source_size; - job_ptr->level = qpl_default_level; - job_ptr->available_out = dest_size; - job_ptr->flags = QPL_FLAG_FIRST | QPL_FLAG_DYNAMIC_HUFFMAN | QPL_FLAG_LAST | QPL_FLAG_OMIT_VERIFY; - - auto status = qpl_execute_job(job_ptr); - if (status == QPL_STS_OK) - { - compressed_size = job_ptr->total_out; - DeflateQplJobHWPool::instance().releaseJob(job_id); - return compressed_size; - } - - LOG_WARNING( - log, - "DeflateQpl HW codec failed, falling back to SW codec. (Details: doCompressData->qpl_execute_job with error code: {} - please " - "refer to qpl_status in ./contrib/qpl/include/qpl/c_api/status.h)", - static_cast(status)); - DeflateQplJobHWPool::instance().releaseJob(job_id); - return RET_ERROR; -} - -Int32 HardwareCodecDeflateQpl::doDecompressDataSynchronous(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) -{ - UInt32 job_id = 0; - qpl_job * job_ptr = nullptr; - UInt32 decompressed_size = 0; - if (!(job_ptr = DeflateQplJobHWPool::instance().acquireJob(job_id))) - { - LOG_INFO(log, "DeflateQpl HW codec failed, falling back to SW codec. (Details: doDecompressDataSynchronous->acquireJob fail, probably job pool exhausted)"); - return RET_ERROR; - } - - // Performing a decompression operation - job_ptr->op = qpl_op_decompress; - job_ptr->next_in_ptr = reinterpret_cast(const_cast(source)); - job_ptr->next_out_ptr = reinterpret_cast(dest); - job_ptr->available_in = source_size; - job_ptr->available_out = uncompressed_size; - job_ptr->flags = QPL_FLAG_FIRST | QPL_FLAG_LAST; - - auto status = qpl_submit_job(job_ptr); - if (status != QPL_STS_OK) - { - DeflateQplJobHWPool::instance().releaseJob(job_id); - LOG_WARNING(log, "DeflateQpl HW codec failed, falling back to SW codec. (Details: doDecompressDataSynchronous->qpl_submit_job with error code: {} - please refer to qpl_status in ./contrib/qpl/include/qpl/c_api/status.h)", static_cast(status)); - return RET_ERROR; - } - /// Busy waiting till job complete. - do - { - _tpause(1, __rdtsc() + 1000); - status = qpl_check_job(job_ptr); - } while (status == QPL_STS_BEING_PROCESSED); - - if (status != QPL_STS_OK) - { - DeflateQplJobHWPool::instance().releaseJob(job_id); - LOG_WARNING(log, "DeflateQpl HW codec failed, falling back to SW codec. (Details: doDecompressDataSynchronous->qpl_submit_job with error code: {} - please refer to qpl_status in ./contrib/qpl/include/qpl/c_api/status.h)", static_cast(status)); - return RET_ERROR; - } - - decompressed_size = job_ptr->total_out; - DeflateQplJobHWPool::instance().releaseJob(job_id); - return decompressed_size; -} - -Int32 HardwareCodecDeflateQpl::doDecompressDataAsynchronous(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) -{ - UInt32 job_id = 0; - qpl_job * job_ptr = nullptr; - if (!(job_ptr = DeflateQplJobHWPool::instance().acquireJob(job_id))) - { - LOG_INFO(log, "DeflateQpl HW codec failed, falling back to SW codec. (Details: doDecompressDataAsynchronous->acquireJob fail, probably job pool exhausted)"); - return RET_ERROR; - } - - // Performing a decompression operation - job_ptr->op = qpl_op_decompress; - job_ptr->next_in_ptr = reinterpret_cast(const_cast(source)); - job_ptr->next_out_ptr = reinterpret_cast(dest); - job_ptr->available_in = source_size; - job_ptr->available_out = uncompressed_size; - job_ptr->flags = QPL_FLAG_FIRST | QPL_FLAG_LAST; - - auto status = qpl_submit_job(job_ptr); - if (status == QPL_STS_OK) - { - decomp_async_job_map.insert({job_id, job_ptr}); - return job_id; - } - - DeflateQplJobHWPool::instance().releaseJob(job_id); - LOG_WARNING( - log, - "DeflateQpl HW codec failed, falling back to SW codec. (Details: doDecompressDataAsynchronous->qpl_submit_job with error code: {} " - "- please refer to qpl_status in ./contrib/qpl/include/qpl/c_api/status.h)", - static_cast(status)); - return RET_ERROR; -} - -void HardwareCodecDeflateQpl::flushAsynchronousDecompressRequests() -{ - auto n_jobs_processing = decomp_async_job_map.size(); - std::map::iterator it = decomp_async_job_map.begin(); - - while (n_jobs_processing) - { - UInt32 job_id = 0; - qpl_job * job_ptr = nullptr; - job_id = it->first; - job_ptr = it->second; - - auto status = qpl_check_job(job_ptr); - if (status == QPL_STS_BEING_PROCESSED) - { - it++; - } - else - { - if (status != QPL_STS_OK) - { - sw_codec.doDecompressData( - reinterpret_cast(job_ptr->next_in_ptr), - job_ptr->available_in, - reinterpret_cast(job_ptr->next_out_ptr), - job_ptr->available_out); - LOG_WARNING(log, "DeflateQpl HW codec failed, falling back to SW codec. (Details: flushAsynchronousDecompressRequests with error code: {} - please refer to qpl_status in ./contrib/qpl/include/qpl/c_api/status.h)", static_cast(status)); - } - it = decomp_async_job_map.erase(it); - DeflateQplJobHWPool::instance().releaseJob(job_id); - n_jobs_processing--; - if (n_jobs_processing <= 0) - break; - } - - if (it == decomp_async_job_map.end()) - { - it = decomp_async_job_map.begin(); - _tpause(1, __rdtsc() + 1000); - } - } -} - -SoftwareCodecDeflateQpl::~SoftwareCodecDeflateQpl() -{ - if (!sw_job) - qpl_fini_job(sw_job); -} - -qpl_job * SoftwareCodecDeflateQpl::getJobCodecPtr() -{ - if (!sw_job) - { - UInt32 size = 0; - qpl_get_job_size(qpl_path_software, &size); - - sw_buffer = std::make_unique(size); - sw_job = reinterpret_cast(sw_buffer.get()); - - // Job initialization - if (auto status = qpl_init_job(qpl_path_software, sw_job); status != QPL_STS_OK) - throw Exception(ErrorCodes::CANNOT_COMPRESS, - "Initialization of DeflateQpl software fallback codec failed. " - "(Details: qpl_init_job with error code: " - "{} - please refer to qpl_status in ./contrib/qpl/include/qpl/c_api/status.h)", - static_cast(status)); - } - return sw_job; -} - -UInt32 SoftwareCodecDeflateQpl::doCompressData(const char * source, UInt32 source_size, char * dest, UInt32 dest_size) -{ - qpl_job * job_ptr = getJobCodecPtr(); - // Performing a compression operation - job_ptr->op = qpl_op_compress; - job_ptr->next_in_ptr = reinterpret_cast(const_cast(source)); - job_ptr->next_out_ptr = reinterpret_cast(dest); - job_ptr->available_in = source_size; - job_ptr->available_out = dest_size; - job_ptr->level = qpl_default_level; - job_ptr->flags = QPL_FLAG_FIRST | QPL_FLAG_DYNAMIC_HUFFMAN | QPL_FLAG_LAST | QPL_FLAG_OMIT_VERIFY; - - if (auto status = qpl_execute_job(job_ptr); status != QPL_STS_OK) - throw Exception(ErrorCodes::CANNOT_COMPRESS, - "Execution of DeflateQpl software fallback codec failed. " - "(Details: qpl_execute_job with error code: " - "{} - please refer to qpl_status in ./contrib/qpl/include/qpl/c_api/status.h)", - static_cast(status)); - - return job_ptr->total_out; -} - -void SoftwareCodecDeflateQpl::doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) -{ - qpl_job * job_ptr = getJobCodecPtr(); - - // Performing a decompression operation - job_ptr->op = qpl_op_decompress; - job_ptr->next_in_ptr = reinterpret_cast(const_cast(source)); - job_ptr->next_out_ptr = reinterpret_cast(dest); - job_ptr->available_in = source_size; - job_ptr->available_out = uncompressed_size; - job_ptr->flags = QPL_FLAG_FIRST | QPL_FLAG_LAST; - - if (auto status = qpl_execute_job(job_ptr); status != QPL_STS_OK) - throw Exception(ErrorCodes::CANNOT_DECOMPRESS, - "Execution of DeflateQpl software fallback codec failed. " - "(Details: qpl_execute_job with error code: " - "{} - please refer to qpl_status in ./contrib/qpl/include/qpl/c_api/status.h)", - static_cast(status)); -} - -CompressionCodecDeflateQpl::CompressionCodecDeflateQpl() - : sw_codec(std::make_unique()) - , hw_codec(std::make_unique(*sw_codec)) -{ - setCodecDescription("DEFLATE_QPL"); -} - -uint8_t CompressionCodecDeflateQpl::getMethodByte() const -{ - return static_cast(CompressionMethodByte::DeflateQpl); -} - -void CompressionCodecDeflateQpl::updateHash(SipHash & hash) const -{ - getCodecDesc()->updateTreeHash(hash, /*ignore_aliases=*/ true); -} - -UInt32 CompressionCodecDeflateQpl::getMaxCompressedDataSize(UInt32 uncompressed_size) const -{ - /// Aligned with ZLIB - return ((uncompressed_size) + ((uncompressed_size) >> 12) + ((uncompressed_size) >> 14) + ((uncompressed_size) >> 25) + 13); -} - -UInt32 CompressionCodecDeflateQpl::doCompressData(const char * source, UInt32 source_size, char * dest) const -{ -/// QPL library is using AVX-512 with some shuffle operations. -/// Memory sanitizer don't understand if there was uninitialized memory in SIMD register but it was not used in the result of shuffle. - __msan_unpoison(dest, getMaxCompressedDataSize(source_size)); - Int32 res = HardwareCodecDeflateQpl::RET_ERROR; - if (DeflateQplJobHWPool::instance().isJobPoolReady()) - res = hw_codec->doCompressData(source, source_size, dest, getMaxCompressedDataSize(source_size)); - if (res == HardwareCodecDeflateQpl::RET_ERROR) - res = sw_codec->doCompressData(source, source_size, dest, getMaxCompressedDataSize(source_size)); - return res; -} - -inline void touchBufferWithZeroFilling(char * buffer, UInt32 buffer_size) -{ - for (char * p = buffer; p < buffer + buffer_size; p += ::getPageSize()/(sizeof(*p))) - { - *p = 0; - } -} - -void CompressionCodecDeflateQpl::doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const -{ -/// QPL library is using AVX-512 with some shuffle operations. -/// Memory sanitizer don't understand if there was uninitialized memory in SIMD register but it was not used in the result of shuffle. - __msan_unpoison(dest, uncompressed_size); -/// Device IOTLB miss has big perf. impact for IAA accelerators. -/// To avoid page fault, we need touch buffers related to accelerator in advance. - touchBufferWithZeroFilling(dest, uncompressed_size); - - switch (getDecompressMode()) - { - case CodecMode::Synchronous: - { - Int32 res = HardwareCodecDeflateQpl::RET_ERROR; - if (DeflateQplJobHWPool::instance().isJobPoolReady()) - { - res = hw_codec->doDecompressDataSynchronous(source, source_size, dest, uncompressed_size); - if (res == HardwareCodecDeflateQpl::RET_ERROR) - sw_codec->doDecompressData(source, source_size, dest, uncompressed_size); - } - else - sw_codec->doDecompressData(source, source_size, dest, uncompressed_size); - return; - } - case CodecMode::Asynchronous: - { - Int32 res = HardwareCodecDeflateQpl::RET_ERROR; - if (DeflateQplJobHWPool::instance().isJobPoolReady()) - res = hw_codec->doDecompressDataAsynchronous(source, source_size, dest, uncompressed_size); - if (res == HardwareCodecDeflateQpl::RET_ERROR) - sw_codec->doDecompressData(source, source_size, dest, uncompressed_size); - return; - } - case CodecMode::SoftwareFallback: - sw_codec->doDecompressData(source, source_size, dest, uncompressed_size); - return; - } -} - -void CompressionCodecDeflateQpl::flushAsynchronousDecompressRequests() -{ - if (DeflateQplJobHWPool::instance().isJobPoolReady()) - hw_codec->flushAsynchronousDecompressRequests(); - /// After flush previous all async requests, we must restore mode to be synchronous by default. - setDecompressMode(CodecMode::Synchronous); -} -void registerCodecDeflateQpl(CompressionCodecFactory & factory) -{ - factory.registerSimpleCompressionCodec( - "DEFLATE_QPL", static_cast(CompressionMethodByte::DeflateQpl), [&]() { return std::make_shared(); }); -} -} -#endif diff --git a/src/Compression/CompressionCodecDeflateQpl.h b/src/Compression/CompressionCodecDeflateQpl.h deleted file mode 100644 index d9abc0fb7e0..00000000000 --- a/src/Compression/CompressionCodecDeflateQpl.h +++ /dev/null @@ -1,125 +0,0 @@ -#pragma once - -#include -#include -#include -#include - -#include "config.h" - -#if USE_QPL - -#include - -namespace Poco -{ -class Logger; -} - -namespace DB -{ - -/// DeflateQplJobHWPool is resource pool to provide the job objects. -/// Job object is used for storing context information during offloading compression job to HW Accelerator. -class DeflateQplJobHWPool -{ -public: - DeflateQplJobHWPool(); - ~DeflateQplJobHWPool(); - - static DeflateQplJobHWPool & instance(); - - qpl_job * acquireJob(UInt32 & job_id); - void releaseJob(UInt32 job_id); - const bool & isJobPoolReady() const { return job_pool_ready; } - -private: - bool tryLockJob(UInt32 index); - void unLockJob(UInt32 index); - - /// size of each job objects - UInt32 per_job_size; - /// Maximum jobs running in parallel supported by IAA hardware - UInt32 max_hw_jobs; - /// Entire buffer for storing all job objects - std::unique_ptr hw_jobs_buffer; - /// Locks for accessing each job object pointers - std::unique_ptr hw_job_ptr_locks; - - bool job_pool_ready; - pcg64_fast random_engine; - std::uniform_int_distribution distribution; -}; - -class SoftwareCodecDeflateQpl -{ -public: - ~SoftwareCodecDeflateQpl(); - UInt32 doCompressData(const char * source, UInt32 source_size, char * dest, UInt32 dest_size); - void doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size); - -private: - qpl_job * sw_job = nullptr; - std::unique_ptr sw_buffer; - - qpl_job * getJobCodecPtr(); -}; - -class HardwareCodecDeflateQpl -{ -public: - /// RET_ERROR stands for hardware codec fail, needs fallback to software codec. - static constexpr Int32 RET_ERROR = -1; - - explicit HardwareCodecDeflateQpl(SoftwareCodecDeflateQpl & sw_codec_); - ~HardwareCodecDeflateQpl(); - - Int32 doCompressData(const char * source, UInt32 source_size, char * dest, UInt32 dest_size) const; - - /// Submit job request to the IAA hardware and then busy waiting till it complete. - Int32 doDecompressDataSynchronous(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size); - - /// Submit job request to the IAA hardware and return immediately. IAA hardware will process decompression jobs automatically. - Int32 doDecompressDataAsynchronous(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size); - - /// Flush result for all previous requests which means busy waiting till all the jobs in "decomp_async_job_map" are finished. - /// Must be called subsequently after several calls of doDecompressDataReq. - void flushAsynchronousDecompressRequests(); - -private: - /// Asynchronous job map for decompression: job ID - job object. - /// For each submission, push job ID && job object into this map; - /// For flush, pop out job ID && job object from this map. Use job ID to release job lock and use job object to check job status till complete. - std::map decomp_async_job_map; - LoggerPtr log; - /// Provides a fallback in case of errors. - SoftwareCodecDeflateQpl & sw_codec; -}; - -class CompressionCodecDeflateQpl final : public ICompressionCodec -{ -public: - CompressionCodecDeflateQpl(); - uint8_t getMethodByte() const override; - void updateHash(SipHash & hash) const override; - -protected: - bool isCompression() const override { return true; } - bool isGenericCompression() const override { return true; } - bool isDeflateQpl() const override { return true; } - - UInt32 doCompressData(const char * source, UInt32 source_size, char * dest) const override; - void doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const override; - - /// Flush result for previous asynchronous decompression requests on asynchronous mode. - void flushAsynchronousDecompressRequests() override; - -private: - UInt32 getMaxCompressedDataSize(UInt32 uncompressed_size) const override; - - std::unique_ptr sw_codec; - std::unique_ptr hw_codec; -}; - -} -#endif diff --git a/src/Compression/CompressionFactory.cpp b/src/Compression/CompressionFactory.cpp index fb4581f22b4..c8ad3d71376 100644 --- a/src/Compression/CompressionFactory.cpp +++ b/src/Compression/CompressionFactory.cpp @@ -176,9 +176,6 @@ void registerCodecZSTD(CompressionCodecFactory & factory); void registerCodecZSTDQAT(CompressionCodecFactory & factory); #endif void registerCodecMultiple(CompressionCodecFactory & factory); -#if USE_QPL -void registerCodecDeflateQpl(CompressionCodecFactory & factory); -#endif /// Keeper use only general-purpose codecs, so we don't need these special codecs /// in standalone build @@ -206,9 +203,6 @@ CompressionCodecFactory::CompressionCodecFactory() registerCodecGorilla(*this); registerCodecEncrypted(*this); registerCodecFPC(*this); -#if USE_QPL - registerCodecDeflateQpl(*this); -#endif registerCodecGCD(*this); default_codec = get("LZ4", {}); diff --git a/src/Compression/CompressionFactory.h b/src/Compression/CompressionFactory.h index 2885f35d7bd..64d454d3e86 100644 --- a/src/Compression/CompressionFactory.h +++ b/src/Compression/CompressionFactory.h @@ -40,10 +40,10 @@ public: CompressionCodecPtr getDefaultCodec() const; /// Validate codecs AST specified by user and parses codecs description (substitute default parameters) - ASTPtr validateCodecAndGetPreprocessedAST(const ASTPtr & ast, const DataTypePtr & column_type, bool sanity_check, bool allow_experimental_codecs, bool enable_deflate_qpl_codec, bool enable_zstd_qat_codec) const; + ASTPtr validateCodecAndGetPreprocessedAST(const ASTPtr & ast, const DataTypePtr & column_type, bool sanity_check, bool allow_experimental_codecs, bool enable_zstd_qat_codec) const; /// Validate codecs AST specified by user - void validateCodec(const String & family_name, std::optional level, bool sanity_check, bool allow_experimental_codecs, bool enable_deflate_qpl_codec, bool enable_zstd_qat_codec) const; + void validateCodec(const String & family_name, std::optional level, bool sanity_check, bool allow_experimental_codecs, bool enable_zstd_qat_codec) const; /// Get codec by AST and possible column_type. Some codecs can use /// information about type to improve inner settings, but every codec should diff --git a/src/Compression/CompressionFactoryAdditions.cpp b/src/Compression/CompressionFactoryAdditions.cpp index a54169d4524..09eb2cf3844 100644 --- a/src/Compression/CompressionFactoryAdditions.cpp +++ b/src/Compression/CompressionFactoryAdditions.cpp @@ -34,7 +34,7 @@ namespace ErrorCodes void CompressionCodecFactory::validateCodec( - const String & family_name, std::optional level, bool sanity_check, bool allow_experimental_codecs, bool enable_deflate_qpl_codec, bool enable_zstd_qat_codec) const + const String & family_name, std::optional level, bool sanity_check, bool allow_experimental_codecs, bool enable_zstd_qat_codec) const { if (family_name.empty()) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Compression codec name cannot be empty"); @@ -43,13 +43,13 @@ void CompressionCodecFactory::validateCodec( { auto literal = std::make_shared(static_cast(*level)); validateCodecAndGetPreprocessedAST(makeASTFunction("CODEC", makeASTFunction(Poco::toUpper(family_name), literal)), - {}, sanity_check, allow_experimental_codecs, enable_deflate_qpl_codec, enable_zstd_qat_codec); + {}, sanity_check, allow_experimental_codecs, enable_zstd_qat_codec); } else { auto identifier = std::make_shared(Poco::toUpper(family_name)); validateCodecAndGetPreprocessedAST(makeASTFunction("CODEC", identifier), - {}, sanity_check, allow_experimental_codecs, enable_deflate_qpl_codec, enable_zstd_qat_codec); + {}, sanity_check, allow_experimental_codecs, enable_zstd_qat_codec); } } @@ -77,7 +77,7 @@ bool innerDataTypeIsFloat(const DataTypePtr & type) } ASTPtr CompressionCodecFactory::validateCodecAndGetPreprocessedAST( - const ASTPtr & ast, const DataTypePtr & column_type, bool sanity_check, bool allow_experimental_codecs, bool enable_deflate_qpl_codec, bool enable_zstd_qat_codec) const + const ASTPtr & ast, const DataTypePtr & column_type, bool sanity_check, bool allow_experimental_codecs, bool enable_zstd_qat_codec) const { if (const auto * func = ast->as()) { @@ -159,12 +159,6 @@ ASTPtr CompressionCodecFactory::validateCodecAndGetPreprocessedAST( " You can enable it with the 'allow_experimental_codecs' setting.", codec_family_name); - if (!enable_deflate_qpl_codec && result_codec->isDeflateQpl()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Codec {} is disabled by default." - " You can enable it with the 'enable_deflate_qpl_codec' setting.", - codec_family_name); - if (!enable_zstd_qat_codec && result_codec->isZstdQat()) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Codec {} is disabled by default." diff --git a/src/Compression/CompressionInfo.h b/src/Compression/CompressionInfo.h index ee4b3e38653..f01661cbe1d 100644 --- a/src/Compression/CompressionInfo.h +++ b/src/Compression/CompressionInfo.h @@ -46,7 +46,6 @@ enum class CompressionMethodByte : uint8_t AES_128_GCM_SIV = 0x96, AES_256_GCM_SIV = 0x97, FPC = 0x98, - DeflateQpl = 0x99, GCD = 0x9a, ZSTD_QPL = 0x9b, }; diff --git a/src/Compression/ICompressionCodec.h b/src/Compression/ICompressionCodec.h index f77b1323d2e..549817cb0b9 100644 --- a/src/Compression/ICompressionCodec.h +++ b/src/Compression/ICompressionCodec.h @@ -47,37 +47,9 @@ public: /// Decompress bytes from compressed source to dest. Dest should preallocate memory; UInt32 decompress(const char * source, UInt32 source_size, char * dest) const; - /// Three kinds of codec mode: - /// Synchronous mode which is commonly used by default; - /// --- For the codec with HW decompressor, it means submit request to HW and busy wait till complete. - /// Asynchronous mode which required HW decompressor support; - /// --- For the codec with HW decompressor, it means submit request to HW and return immediately. - /// --- Must be used in pair with flushAsynchronousDecompressRequests. - /// SoftwareFallback mode is exclusively defined for the codec with HW decompressor, enable its capability of "fallback to SW codec". - enum class CodecMode : uint8_t - { - Synchronous, - Asynchronous, - SoftwareFallback - }; - - /// Get current decompression mode - CodecMode getDecompressMode() const{ return decompressMode; } - - /// if set mode to CodecMode::Asynchronous, must be followed with flushAsynchronousDecompressRequests - void setDecompressMode(CodecMode mode) { decompressMode = mode; } - /// Report decompression errors as CANNOT_DECOMPRESS, not CORRUPTED_DATA void setExternalDataFlag() { decompression_error_code = ErrorCodes::CANNOT_DECOMPRESS; } - /// Flush result for previous asynchronous decompression requests. - /// This function must be called following several requests offload to HW. - /// To make sure asynchronous results have been flushed into target buffer completely. - /// Meanwhile, source and target buffer for decompression can not be overwritten until this function execute completely. - /// Otherwise it would conflict with HW offloading and cause exception. - /// For QPL deflate, it support the maximum number of requests equal to DeflateQplJobHWPool::jobPoolSize - virtual void flushAsynchronousDecompressRequests(){} - /// Number of bytes, that will be used to compress uncompressed_size bytes with current codec virtual UInt32 getCompressedReserveSize(UInt32 uncompressed_size) const { @@ -118,9 +90,6 @@ public: /// It will not be allowed to use unless the user will turn off the safety switch. virtual bool isExperimental() const { return false; } - /// Is this the DEFLATE_QPL codec? - virtual bool isDeflateQpl() const { return false; } - /// Is this the ZSTD_QAT codec? virtual bool isZstdQat() const { return false; } @@ -147,7 +116,6 @@ protected: private: ASTPtr full_codec_desc; - CodecMode decompressMode{CodecMode::Synchronous}; }; using CompressionCodecPtr = std::shared_ptr; diff --git a/src/Coordination/CoordinationSettings.cpp b/src/Coordination/CoordinationSettings.cpp index 201d0b47de0..b2f2dbb0b5f 100644 --- a/src/Coordination/CoordinationSettings.cpp +++ b/src/Coordination/CoordinationSettings.cpp @@ -17,52 +17,52 @@ namespace ErrorCodes /** These settings represent fine tunes for internal details of Coordination storages * and should not be changed by the user without a reason. */ -#define LIST_OF_COORDINATION_SETTINGS(M, ALIAS) \ - M(Milliseconds, min_session_timeout_ms, Coordination::DEFAULT_MIN_SESSION_TIMEOUT_MS, "Min client session timeout", 0) \ - M(Milliseconds, session_timeout_ms, Coordination::DEFAULT_MAX_SESSION_TIMEOUT_MS, "Max client session timeout", 0) \ - M(Milliseconds, operation_timeout_ms, Coordination::DEFAULT_OPERATION_TIMEOUT_MS, "Default client operation timeout", 0) \ - M(Milliseconds, dead_session_check_period_ms, 500, "How often leader will check sessions to consider them dead and remove", 0) \ - M(Milliseconds, heart_beat_interval_ms, 500, "Heartbeat interval between quorum nodes", 0) \ - M(Milliseconds, election_timeout_lower_bound_ms, 1000, "Lower bound of election timer (avoid too often leader elections)", 0) \ - M(Milliseconds, election_timeout_upper_bound_ms, 2000, "Upper bound of election timer (avoid too often leader elections)", 0) \ - M(Milliseconds, leadership_expiry_ms, 0, "Duration after which a leader will expire if it fails to receive responses from peers. Set it lower or equal to election_timeout_lower_bound_ms to avoid multiple leaders.", 0) \ - M(UInt64, reserved_log_items, 100000, "How many log items to store (don't remove during compaction)", 0) \ - M(UInt64, snapshot_distance, 100000, "How many log items we have to collect to write new snapshot", 0) \ - M(Bool, auto_forwarding, true, "Allow to forward write requests from followers to leader", 0) \ - M(Milliseconds, shutdown_timeout, 5000, "How much time we will wait until RAFT shutdown", 0) \ - M(Milliseconds, session_shutdown_timeout, 10000, "How much time we will wait until sessions are closed during shutdown", 0) \ - M(Milliseconds, startup_timeout, 180000, "How much time we will wait until RAFT to start.", 0) \ - M(Milliseconds, sleep_before_leader_change_ms, 8000, "How much time we will wait before removing leader (so as leader could commit accepted but non-committed commands and they won't be lost -- leader removal is not synchronized with committing)", 0) \ - M(LogsLevel, raft_logs_level, LogsLevel::information, "Log internal RAFT logs into main server log level. Valid values: 'trace', 'debug', 'information', 'warning', 'error', 'fatal', 'none'", 0) \ - M(UInt64, rotate_log_storage_interval, 100000, "How many records will be stored in one log storage file", 0) \ - M(UInt64, snapshots_to_keep, 3, "How many compressed snapshots to keep on disk", 0) \ - M(UInt64, stale_log_gap, 10000, "When node became stale and should receive snapshots from leader", 0) \ - M(UInt64, fresh_log_gap, 200, "When node became fresh", 0) \ - M(UInt64, max_request_queue_size, 100000, "Maximum number of request that can be in queue for processing", 0) \ - M(UInt64, max_requests_batch_size, 100, "Max size of batch of requests that can be sent to RAFT", 0) \ - M(UInt64, max_requests_batch_bytes_size, 100*1024, "Max size in bytes of batch of requests that can be sent to RAFT", 0) \ - M(UInt64, max_requests_append_size, 100, "Max size of batch of requests that can be sent to replica in append request", 0) \ - M(UInt64, max_flush_batch_size, 1000, "Max size of batch of requests that can be flushed together", 0) \ - M(UInt64, max_requests_quick_batch_size, 100, "Max size of batch of requests to try to get before proceeding with RAFT. Keeper will not wait for requests but take only requests that are already in queue" , 0) \ - M(Bool, quorum_reads, false, "Execute read requests as writes through whole RAFT consesus with similar speed", 0) \ - M(Bool, force_sync, true, "Call fsync on each change in RAFT changelog", 0) \ - M(Bool, compress_logs, false, "Write compressed coordination logs in ZSTD format", 0) \ - M(Bool, compress_snapshots_with_zstd_format, true, "Write compressed snapshots in ZSTD format (instead of custom LZ4)", 0) \ - M(UInt64, configuration_change_tries_count, 20, "How many times we will try to apply configuration change (add/remove server) to the cluster", 0) \ - M(UInt64, max_log_file_size, 50 * 1024 * 1024, "Max size of the Raft log file. If possible, each created log file will preallocate this amount of bytes on disk. Set to 0 to disable the limit", 0) \ - M(UInt64, log_file_overallocate_size, 50 * 1024 * 1024, "If max_log_file_size is not set to 0, this value will be added to it for preallocating bytes on disk. If a log record is larger than this value, it could lead to uncaught out-of-space issues so a larger value is preferred", 0) \ - M(UInt64, min_request_size_for_cache, 50 * 1024, "Minimal size of the request to cache the deserialization result. Caching can have negative effect on latency for smaller requests, set to 0 to disable", 0) \ - M(UInt64, raft_limits_reconnect_limit, 50, "If connection to a peer is silent longer than this limit * (multiplied by heartbeat interval), we re-establish the connection.", 0) \ - M(UInt64, raft_limits_response_limit, 20, "Total wait time for a response is calculated by multiplying response_limit with heart_beat_interval_ms", 0) \ - M(Bool, async_replication, false, "Enable async replication. All write and read guarantees are preserved while better performance is achieved. Settings is disabled by default to not break backwards compatibility.", 0) \ - M(Bool, experimental_use_rocksdb, false, "Use rocksdb as backend storage", 0) \ - M(UInt64, latest_logs_cache_size_threshold, 1 * 1024 * 1024 * 1024, "Maximum total size of in-memory cache of latest log entries.", 0) \ - M(UInt64, commit_logs_cache_size_threshold, 500 * 1024 * 1024, "Maximum total size of in-memory cache of log entries needed next for commit.", 0) \ - M(UInt64, disk_move_retries_wait_ms, 1000, "How long to wait between retries after a failure which happened while a file was being moved between disks.", 0) \ - M(UInt64, disk_move_retries_during_init, 100, "The amount of retries after a failure which happened while a file was being moved between disks during initialization.", 0) \ - M(UInt64, log_slow_total_threshold_ms, 5000, "Requests for which the total latency is larger than this settings will be logged", 0) \ - M(UInt64, log_slow_cpu_threshold_ms, 100, "Requests for which the CPU (preprocessing and processing) latency is larger than this settings will be logged", 0) \ - M(UInt64, log_slow_connection_operation_threshold_ms, 1000, "Log message if a certain operation took too long inside a single connection", 0) +#define LIST_OF_COORDINATION_SETTINGS(DECLARE, ALIAS) \ + DECLARE(Milliseconds, min_session_timeout_ms, Coordination::DEFAULT_MIN_SESSION_TIMEOUT_MS, "Min client session timeout", 0) \ + DECLARE(Milliseconds, session_timeout_ms, Coordination::DEFAULT_MAX_SESSION_TIMEOUT_MS, "Max client session timeout", 0) \ + DECLARE(Milliseconds, operation_timeout_ms, Coordination::DEFAULT_OPERATION_TIMEOUT_MS, "Default client operation timeout", 0) \ + DECLARE(Milliseconds, dead_session_check_period_ms, 500, "How often leader will check sessions to consider them dead and remove", 0) \ + DECLARE(Milliseconds, heart_beat_interval_ms, 500, "Heartbeat interval between quorum nodes", 0) \ + DECLARE(Milliseconds, election_timeout_lower_bound_ms, 1000, "Lower bound of election timer (avoid too often leader elections)", 0) \ + DECLARE(Milliseconds, election_timeout_upper_bound_ms, 2000, "Upper bound of election timer (avoid too often leader elections)", 0) \ + DECLARE(Milliseconds, leadership_expiry_ms, 0, "Duration after which a leader will expire if it fails to receive responses from peers. Set it lower or equal to election_timeout_lower_bound_ms to avoid multiple leaders.", 0) \ + DECLARE(UInt64, reserved_log_items, 100000, "How many log items to store (don't remove during compaction)", 0) \ + DECLARE(UInt64, snapshot_distance, 100000, "How many log items we have to collect to write new snapshot", 0) \ + DECLARE(Bool, auto_forwarding, true, "Allow to forward write requests from followers to leader", 0) \ + DECLARE(Milliseconds, shutdown_timeout, 5000, "How much time we will wait until RAFT shutdown", 0) \ + DECLARE(Milliseconds, session_shutdown_timeout, 10000, "How much time we will wait until sessions are closed during shutdown", 0) \ + DECLARE(Milliseconds, startup_timeout, 180000, "How much time we will wait until RAFT to start.", 0) \ + DECLARE(Milliseconds, sleep_before_leader_change_ms, 8000, "How much time we will wait before removing leader (so as leader could commit accepted but non-committed commands and they won't be lost -- leader removal is not synchronized with committing)", 0) \ + DECLARE(LogsLevel, raft_logs_level, LogsLevel::information, "Log internal RAFT logs into main server log level. Valid values: 'trace', 'debug', 'information', 'warning', 'error', 'fatal', 'none'", 0) \ + DECLARE(UInt64, rotate_log_storage_interval, 100000, "How many records will be stored in one log storage file", 0) \ + DECLARE(UInt64, snapshots_to_keep, 3, "How many compressed snapshots to keep on disk", 0) \ + DECLARE(UInt64, stale_log_gap, 10000, "When node became stale and should receive snapshots from leader", 0) \ + DECLARE(UInt64, fresh_log_gap, 200, "When node became fresh", 0) \ + DECLARE(UInt64, max_request_queue_size, 100000, "Maximum number of request that can be in queue for processing", 0) \ + DECLARE(UInt64, max_requests_batch_size, 100, "Max size of batch of requests that can be sent to RAFT", 0) \ + DECLARE(UInt64, max_requests_batch_bytes_size, 100*1024, "Max size in bytes of batch of requests that can be sent to RAFT", 0) \ + DECLARE(UInt64, max_requests_append_size, 100, "Max size of batch of requests that can be sent to replica in append request", 0) \ + DECLARE(UInt64, max_flush_batch_size, 1000, "Max size of batch of requests that can be flushed together", 0) \ + DECLARE(UInt64, max_requests_quick_batch_size, 100, "Max size of batch of requests to try to get before proceeding with RAFT. Keeper will not wait for requests but take only requests that are already in queue" , 0) \ + DECLARE(Bool, quorum_reads, false, "Execute read requests as writes through whole RAFT consesus with similar speed", 0) \ + DECLARE(Bool, force_sync, true, "Call fsync on each change in RAFT changelog", 0) \ + DECLARE(Bool, compress_logs, false, "Write compressed coordination logs in ZSTD format", 0) \ + DECLARE(Bool, compress_snapshots_with_zstd_format, true, "Write compressed snapshots in ZSTD format (instead of custom LZ4)", 0) \ + DECLARE(UInt64, configuration_change_tries_count, 20, "How many times we will try to apply configuration change (add/remove server) to the cluster", 0) \ + DECLARE(UInt64, max_log_file_size, 50 * 1024 * 1024, "Max size of the Raft log file. If possible, each created log file will preallocate this amount of bytes on disk. Set to 0 to disable the limit", 0) \ + DECLARE(UInt64, log_file_overallocate_size, 50 * 1024 * 1024, "If max_log_file_size is not set to 0, this value will be added to it for preallocating bytes on disk. If a log record is larger than this value, it could lead to uncaught out-of-space issues so a larger value is preferred", 0) \ + DECLARE(UInt64, min_request_size_for_cache, 50 * 1024, "Minimal size of the request to cache the deserialization result. Caching can have negative effect on latency for smaller requests, set to 0 to disable", 0) \ + DECLARE(UInt64, raft_limits_reconnect_limit, 50, "If connection to a peer is silent longer than this limit * (multiplied by heartbeat interval), we re-establish the connection.", 0) \ + DECLARE(UInt64, raft_limits_response_limit, 20, "Total wait time for a response is calculated by multiplying response_limit with heart_beat_interval_ms", 0) \ + DECLARE(Bool, async_replication, false, "Enable async replication. All write and read guarantees are preserved while better performance is achieved. Settings is disabled by default to not break backwards compatibility.", 0) \ + DECLARE(Bool, experimental_use_rocksdb, false, "Use rocksdb as backend storage", 0) \ + DECLARE(UInt64, latest_logs_cache_size_threshold, 1 * 1024 * 1024 * 1024, "Maximum total size of in-memory cache of latest log entries.", 0) \ + DECLARE(UInt64, commit_logs_cache_size_threshold, 500 * 1024 * 1024, "Maximum total size of in-memory cache of log entries needed next for commit.", 0) \ + DECLARE(UInt64, disk_move_retries_wait_ms, 1000, "How long to wait between retries after a failure which happened while a file was being moved between disks.", 0) \ + DECLARE(UInt64, disk_move_retries_during_init, 100, "The amount of retries after a failure which happened while a file was being moved between disks during initialization.", 0) \ + DECLARE(UInt64, log_slow_total_threshold_ms, 5000, "Requests for which the total latency is larger than this settings will be logged", 0) \ + DECLARE(UInt64, log_slow_cpu_threshold_ms, 100, "Requests for which the CPU (preprocessing and processing) latency is larger than this settings will be logged", 0) \ + DECLARE(UInt64, log_slow_connection_operation_threshold_ms, 1000, "Log message if a certain operation took too long inside a single connection", 0) DECLARE_SETTINGS_TRAITS(CoordinationSettingsTraits, LIST_OF_COORDINATION_SETTINGS) IMPLEMENT_SETTINGS_TRAITS(CoordinationSettingsTraits, LIST_OF_COORDINATION_SETTINGS) diff --git a/src/Coordination/LoggerWrapper.h b/src/Coordination/LoggerWrapper.h index d08c42b6868..b7deabb9021 100644 --- a/src/Coordination/LoggerWrapper.h +++ b/src/Coordination/LoggerWrapper.h @@ -1,8 +1,8 @@ #pragma once +#include #include #include -#include namespace DB { diff --git a/src/Core/BaseSettings.h b/src/Core/BaseSettings.h index 1bebf019e43..931c54e7109 100644 --- a/src/Core/BaseSettings.h +++ b/src/Core/BaseSettings.h @@ -22,6 +22,9 @@ class ReadBuffer; class WriteBuffer; /** Template class to define collections of settings. + * If you create a new setting, please also add it to ./utils/check-style/check-settings-style + * for validation + * * Example of usage: * * mysettings.h: @@ -49,10 +52,10 @@ class WriteBuffer; * #include * #include * - * #define APPLY_FOR_MYSETTINGS(M) \ - * M(UInt64, a, 100, "Description of a", 0) \ - * M(Float, f, 3.11, "Description of f", IMPORTANT) // IMPORTANT - means the setting can't be ignored by older versions) \ - * M(String, s, "default", "Description of s", 0) + * #define APPLY_FOR_MYSETTINGS(DECLARE, ALIAS) \ + * DECLARE(UInt64, a, 100, "Description of a", 0) \ + * DECLARE(Float, f, 3.11, "Description of f", IMPORTANT) // IMPORTANT - means the setting can't be ignored by older versions) \ + * DECLARE(String, s, "default", "Description of s", 0) * * DECLARE_SETTINGS_TRAITS(MySettingsTraits, APPLY_FOR_MYSETTINGS) * IMPLEMENT_SETTINGS_TRAITS(MySettingsTraits, APPLY_FOR_MYSETTINGS) diff --git a/src/Core/FormatFactorySettings.cpp b/src/Core/FormatFactorySettings.cpp index 9735905c310..7c0569c4846 100644 --- a/src/Core/FormatFactorySettings.cpp +++ b/src/Core/FormatFactorySettings.cpp @@ -1,5 +1,5 @@ #include -#include +#include #include namespace DB diff --git a/src/Core/FormatFactorySettings.h b/src/Core/FormatFactorySettings.h index e7749e91fbb..a095bffc4c9 100644 --- a/src/Core/FormatFactorySettings.h +++ b/src/Core/FormatFactorySettings.h @@ -1,55 +1,1260 @@ #pragma once -#include -#include -#include -#include +/// This header exists so we can share it between multiple setting objects that include format settings -namespace DB +#include + +// clang-format off +#if defined(__CLION_IDE__) +/// CLion freezes for a minute every time it processes this +#define FORMAT_FACTORY_SETTINGS(M, ALIAS) +#define OBSOLETE_FORMAT_SETTINGS(M, ALIAS) +#else + +#define FORMAT_FACTORY_SETTINGS(DECLARE, ALIAS) \ + DECLARE(Char, format_csv_delimiter, ',', R"( +The character to be considered as a delimiter in CSV data. If setting with a string, a string has to have a length of 1. +)", 0) \ + DECLARE(Bool, format_csv_allow_single_quotes, false, R"( +If it is set to true, allow strings in single quotes. +)", 0) \ + DECLARE(Bool, format_csv_allow_double_quotes, true, R"( +If it is set to true, allow strings in double quotes. +)", 0) \ + DECLARE(Bool, output_format_csv_serialize_tuple_into_separate_columns, true, R"( +If it set to true, then Tuples in CSV format are serialized as separate columns (that is, their nesting in the tuple is lost) +)", 0) \ + DECLARE(Bool, input_format_csv_deserialize_separate_columns_into_tuple, true, R"( +If it set to true, then separate columns written in CSV format can be deserialized to Tuple column. +)", 0) \ + DECLARE(Bool, output_format_csv_crlf_end_of_line, false, R"( +If it is set true, end of line in CSV format will be \\r\\n instead of \\n. +)", 0) \ + DECLARE(Bool, input_format_csv_allow_cr_end_of_line, false, R"( +If it is set true, \\r will be allowed at end of line not followed by \\n +)", 0) \ + DECLARE(Bool, input_format_csv_enum_as_number, false, R"( +Treat inserted enum values in CSV formats as enum indices +)", 0) \ + DECLARE(Bool, input_format_csv_arrays_as_nested_csv, false, R"( +When reading Array from CSV, expect that its elements were serialized in nested CSV and then put into string. Example: \"[\"\"Hello\"\", \"\"world\"\", \"\"42\"\"\"\" TV\"\"]\". Braces around array can be omitted. +)", 0) \ + DECLARE(Bool, input_format_skip_unknown_fields, true, R"( +Enables or disables skipping insertion of extra data. + +When writing data, ClickHouse throws an exception if input data contain columns that do not exist in the target table. If skipping is enabled, ClickHouse does not insert extra data and does not throw an exception. + +Supported formats: + +- [JSONEachRow](../../interfaces/formats.md/#jsoneachrow) (and other JSON formats) +- [BSONEachRow](../../interfaces/formats.md/#bsoneachrow) (and other JSON formats) +- [TSKV](../../interfaces/formats.md/#tskv) +- All formats with suffixes WithNames/WithNamesAndTypes +- [MySQLDump](../../interfaces/formats.md/#mysqldump) +- [Native](../../interfaces/formats.md/#native) + +Possible values: + +- 0 — Disabled. +- 1 — Enabled. +)", 0) \ + DECLARE(Bool, input_format_with_names_use_header, true, R"( +Enables or disables checking the column order when inserting data. + +To improve insert performance, we recommend disabling this check if you are sure that the column order of the input data is the same as in the target table. + +Supported formats: + +- [CSVWithNames](../../interfaces/formats.md/#csvwithnames) +- [CSVWithNamesAndTypes](../../interfaces/formats.md/#csvwithnamesandtypes) +- [TabSeparatedWithNames](../../interfaces/formats.md/#tabseparatedwithnames) +- [TabSeparatedWithNamesAndTypes](../../interfaces/formats.md/#tabseparatedwithnamesandtypes) +- [JSONCompactEachRowWithNames](../../interfaces/formats.md/#jsoncompacteachrowwithnames) +- [JSONCompactEachRowWithNamesAndTypes](../../interfaces/formats.md/#jsoncompacteachrowwithnamesandtypes) +- [JSONCompactStringsEachRowWithNames](../../interfaces/formats.md/#jsoncompactstringseachrowwithnames) +- [JSONCompactStringsEachRowWithNamesAndTypes](../../interfaces/formats.md/#jsoncompactstringseachrowwithnamesandtypes) +- [RowBinaryWithNames](../../interfaces/formats.md/#rowbinarywithnames) +- [RowBinaryWithNamesAndTypes](../../interfaces/formats.md/#rowbinarywithnamesandtypes) +- [CustomSeparatedWithNames](../../interfaces/formats.md/#customseparatedwithnames) +- [CustomSeparatedWithNamesAndTypes](../../interfaces/formats.md/#customseparatedwithnamesandtypes) + +Possible values: + +- 0 — Disabled. +- 1 — Enabled. +)", 0) \ + DECLARE(Bool, input_format_with_types_use_header, true, R"( +Controls whether format parser should check if data types from the input data match data types from the target table. + +Supported formats: + +- [CSVWithNamesAndTypes](../../interfaces/formats.md/#csvwithnamesandtypes) +- [TabSeparatedWithNamesAndTypes](../../interfaces/formats.md/#tabseparatedwithnamesandtypes) +- [JSONCompactEachRowWithNamesAndTypes](../../interfaces/formats.md/#jsoncompacteachrowwithnamesandtypes) +- [JSONCompactStringsEachRowWithNamesAndTypes](../../interfaces/formats.md/#jsoncompactstringseachrowwithnamesandtypes) +- [RowBinaryWithNamesAndTypes](../../interfaces/formats.md/#rowbinarywithnamesandtypes-rowbinarywithnamesandtypes) +- [CustomSeparatedWithNamesAndTypes](../../interfaces/formats.md/#customseparatedwithnamesandtypes) + +Possible values: + +- 0 — Disabled. +- 1 — Enabled. +)", 0) \ + DECLARE(Bool, input_format_import_nested_json, false, R"( +Enables or disables the insertion of JSON data with nested objects. + +Supported formats: + +- [JSONEachRow](../../interfaces/formats.md/#jsoneachrow) + +Possible values: + +- 0 — Disabled. +- 1 — Enabled. + +See also: + +- [Usage of Nested Structures](../../interfaces/formats.md/#jsoneachrow-nested) with the `JSONEachRow` format. +)", 0) \ + DECLARE(Bool, input_format_defaults_for_omitted_fields, true, R"( +When performing `INSERT` queries, replace omitted input column values with default values of the respective columns. This option applies to [JSONEachRow](../../interfaces/formats.md/#jsoneachrow) (and other JSON formats), [CSV](../../interfaces/formats.md/#csv), [TabSeparated](../../interfaces/formats.md/#tabseparated), [TSKV](../../interfaces/formats.md/#tskv), [Parquet](../../interfaces/formats.md/#parquet), [Arrow](../../interfaces/formats.md/#arrow), [Avro](../../interfaces/formats.md/#avro), [ORC](../../interfaces/formats.md/#orc), [Native](../../interfaces/formats.md/#native) formats and formats with `WithNames`/`WithNamesAndTypes` suffixes. + +:::note +When this option is enabled, extended table metadata are sent from server to client. It consumes additional computing resources on the server and can reduce performance. +::: + +Possible values: + +- 0 — Disabled. +- 1 — Enabled. +)", IMPORTANT) \ + DECLARE(Bool, input_format_csv_empty_as_default, true, R"( +Treat empty fields in CSV input as default values. +)", 0) \ + DECLARE(Bool, input_format_tsv_empty_as_default, false, R"( +Treat empty fields in TSV input as default values. +)", 0) \ + DECLARE(Bool, input_format_tsv_enum_as_number, false, R"( +Treat inserted enum values in TSV formats as enum indices. +)", 0) \ + DECLARE(Bool, input_format_null_as_default, true, R"( +Enables or disables the initialization of [NULL](../../sql-reference/syntax.md/#null-literal) fields with [default values](../../sql-reference/statements/create/table.md/#create-default-values), if data type of these fields is not [nullable](../../sql-reference/data-types/nullable.md/#data_type-nullable). +If column type is not nullable and this setting is disabled, then inserting `NULL` causes an exception. If column type is nullable, then `NULL` values are inserted as is, regardless of this setting. + +This setting is applicable for most input formats. + +For complex default expressions `input_format_defaults_for_omitted_fields` must be enabled too. + +Possible values: + +- 0 — Inserting `NULL` into a not nullable column causes an exception. +- 1 — `NULL` fields are initialized with default column values. +)", 0) \ + DECLARE(Bool, input_format_force_null_for_omitted_fields, false, R"( +Force initialize omitted fields with null values +)", 0) \ + DECLARE(Bool, input_format_arrow_case_insensitive_column_matching, false, R"( +Ignore case when matching Arrow columns with CH columns. +)", 0) \ + DECLARE(Int64, input_format_orc_row_batch_size, 100'000, R"( +Batch size when reading ORC stripes. +)", 0) \ + DECLARE(Bool, input_format_orc_case_insensitive_column_matching, false, R"( +Ignore case when matching ORC columns with CH columns. +)", 0) \ + DECLARE(Bool, input_format_parquet_case_insensitive_column_matching, false, R"( +Ignore case when matching Parquet columns with CH columns. +)", 0) \ + DECLARE(Bool, input_format_parquet_preserve_order, false, R"( +Avoid reordering rows when reading from Parquet files. Usually makes it much slower. +)", 0) \ + DECLARE(Bool, input_format_parquet_filter_push_down, true, R"( +When reading Parquet files, skip whole row groups based on the WHERE/PREWHERE expressions and min/max statistics in the Parquet metadata. +)", 0) \ + DECLARE(Bool, input_format_parquet_bloom_filter_push_down, false, R"( +When reading Parquet files, skip whole row groups based on the WHERE expressions and bloom filter in the Parquet metadata. +)", 0) \ + DECLARE(Bool, input_format_parquet_use_native_reader, false, R"( +When reading Parquet files, to use native reader instead of arrow reader. +)", 0) \ + DECLARE(Bool, input_format_allow_seeks, true, R"( +Allow seeks while reading in ORC/Parquet/Arrow input formats. + +Enabled by default. +)", 0) \ + DECLARE(Bool, input_format_orc_allow_missing_columns, true, R"( +Allow missing columns while reading ORC input formats +)", 0) \ + DECLARE(Bool, input_format_orc_use_fast_decoder, true, R"( +Use a faster ORC decoder implementation. +)", 0) \ + DECLARE(Bool, input_format_orc_filter_push_down, true, R"( +When reading ORC files, skip whole stripes or row groups based on the WHERE/PREWHERE expressions, min/max statistics or bloom filter in the ORC metadata. +)", 0) \ + DECLARE(String, input_format_orc_reader_time_zone_name, "GMT", R"( +The time zone name for ORC row reader, the default ORC row reader's time zone is GMT. +)", 0) \ + DECLARE(Bool, input_format_orc_dictionary_as_low_cardinality, true, R"( +Treat ORC dictionary encoded columns as LowCardinality columns while reading ORC files. +)", 0) \ + DECLARE(Bool, input_format_parquet_allow_missing_columns, true, R"( +Allow missing columns while reading Parquet input formats +)", 0) \ + DECLARE(UInt64, input_format_parquet_local_file_min_bytes_for_seek, 8192, R"( +Min bytes required for local read (file) to do seek, instead of read with ignore in Parquet input format +)", 0) \ + DECLARE(Bool, input_format_parquet_enable_row_group_prefetch, true, R"( +Enable row group prefetching during parquet parsing. Currently, only single-threaded parsing can prefetch. +)", 0) \ + DECLARE(Bool, input_format_arrow_allow_missing_columns, true, R"( +Allow missing columns while reading Arrow input formats +)", 0) \ + DECLARE(Char, input_format_hive_text_fields_delimiter, '\x01', R"( +Delimiter between fields in Hive Text File +)", 0) \ + DECLARE(Char, input_format_hive_text_collection_items_delimiter, '\x02', R"( +Delimiter between collection(array or map) items in Hive Text File +)", 0) \ + DECLARE(Char, input_format_hive_text_map_keys_delimiter, '\x03', R"( +Delimiter between a pair of map key/values in Hive Text File +)", 0) \ + DECLARE(Bool, input_format_hive_text_allow_variable_number_of_columns, true, R"( +Ignore extra columns in Hive Text input (if file has more columns than expected) and treat missing fields in Hive Text input as default values +)", 0) \ + DECLARE(UInt64, input_format_msgpack_number_of_columns, 0, R"( +The number of columns in inserted MsgPack data. Used for automatic schema inference from data. +)", 0) \ + DECLARE(MsgPackUUIDRepresentation, output_format_msgpack_uuid_representation, FormatSettings::MsgPackUUIDRepresentation::EXT, R"( +The way how to output UUID in MsgPack format. +)", 0) \ + DECLARE(UInt64, input_format_max_rows_to_read_for_schema_inference, 25000, R"( +The maximum rows of data to read for automatic schema inference. +)", 0) \ + DECLARE(UInt64, input_format_max_bytes_to_read_for_schema_inference, 32 * 1024 * 1024, R"( +The maximum amount of data in bytes to read for automatic schema inference. +)", 0) \ + DECLARE(Bool, input_format_csv_use_best_effort_in_schema_inference, true, R"( +Use some tweaks and heuristics to infer schema in CSV format +)", 0) \ + DECLARE(Bool, input_format_csv_try_infer_numbers_from_strings, false, R"( +If enabled, during schema inference ClickHouse will try to infer numbers from string fields. +It can be useful if CSV data contains quoted UInt64 numbers. + +Disabled by default. +)", 0) \ + DECLARE(Bool, input_format_csv_try_infer_strings_from_quoted_tuples, true, R"( +Interpret quoted tuples in the input data as a value of type String. +)", 0) \ + DECLARE(Bool, input_format_tsv_use_best_effort_in_schema_inference, true, R"( +Use some tweaks and heuristics to infer schema in TSV format +)", 0) \ + DECLARE(Bool, input_format_csv_detect_header, true, R"( +Automatically detect header with names and types in CSV format +)", 0) \ + DECLARE(Bool, input_format_csv_allow_whitespace_or_tab_as_delimiter, false, R"( +Allow to use spaces and tabs(\\t) as field delimiter in the CSV strings +)", 0) \ + DECLARE(Bool, input_format_csv_trim_whitespaces, true, R"( +Trims spaces and tabs (\\t) characters at the beginning and end in CSV strings +)", 0) \ + DECLARE(Bool, input_format_csv_use_default_on_bad_values, false, R"( +Allow to set default value to column when CSV field deserialization failed on bad value +)", 0) \ + DECLARE(Bool, input_format_csv_allow_variable_number_of_columns, false, R"( +Ignore extra columns in CSV input (if file has more columns than expected) and treat missing fields in CSV input as default values +)", 0) \ + DECLARE(Bool, input_format_tsv_allow_variable_number_of_columns, false, R"( +Ignore extra columns in TSV input (if file has more columns than expected) and treat missing fields in TSV input as default values +)", 0) \ + DECLARE(Bool, input_format_custom_allow_variable_number_of_columns, false, R"( +Ignore extra columns in CustomSeparated input (if file has more columns than expected) and treat missing fields in CustomSeparated input as default values +)", 0) \ + DECLARE(Bool, input_format_json_compact_allow_variable_number_of_columns, false, R"( +Ignore extra columns in JSONCompact(EachRow) input (if file has more columns than expected) and treat missing fields in JSONCompact(EachRow) input as default values +)", 0) \ + DECLARE(Bool, input_format_tsv_detect_header, true, R"( +Automatically detect header with names and types in TSV format +)", 0) \ + DECLARE(Bool, input_format_custom_detect_header, true, R"( +Automatically detect header with names and types in CustomSeparated format +)", 0) \ + DECLARE(Bool, input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference, false, R"( +Skip columns with unsupported types while schema inference for format Parquet +)", 0) \ + DECLARE(UInt64, input_format_parquet_max_block_size, DEFAULT_BLOCK_SIZE, R"( +Max block size for parquet reader. +)", 0) \ + DECLARE(UInt64, input_format_parquet_prefer_block_bytes, DEFAULT_BLOCK_SIZE * 256, R"( +Average block bytes output by parquet reader +)", 0) \ + DECLARE(Bool, input_format_protobuf_skip_fields_with_unsupported_types_in_schema_inference, false, R"( +Skip fields with unsupported types while schema inference for format Protobuf +)", 0) \ + DECLARE(Bool, input_format_capn_proto_skip_fields_with_unsupported_types_in_schema_inference, false, R"( +Skip columns with unsupported types while schema inference for format CapnProto +)", 0) \ + DECLARE(Bool, input_format_orc_skip_columns_with_unsupported_types_in_schema_inference, false, R"( +Skip columns with unsupported types while schema inference for format ORC +)", 0) \ + DECLARE(Bool, input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference, false, R"( +Skip columns with unsupported types while schema inference for format Arrow +)", 0) \ + DECLARE(String, column_names_for_schema_inference, "", R"( +The list of column names to use in schema inference for formats without column names. The format: 'column1,column2,column3,...' +)", 0) \ + DECLARE(String, schema_inference_hints, "", R"( +The list of column names and types to use as hints in schema inference for formats without schema. + +Example: + +Query: +```sql +desc format(JSONEachRow, '{"x" : 1, "y" : "String", "z" : "0.0.0.0" }') settings schema_inference_hints='x UInt8, z IPv4'; +``` + +Result: +```sql +x UInt8 +y Nullable(String) +z IPv4 +``` + +:::note +If the `schema_inference_hints` is not formatted properly, or if there is a typo or a wrong datatype, etc... the whole schema_inference_hints will be ignored. +::: +)", 0) \ + DECLARE(SchemaInferenceMode, schema_inference_mode, "default", R"( +Mode of schema inference. 'default' - assume that all files have the same schema and schema can be inferred from any file, 'union' - files can have different schemas and the resulting schema should be the a union of schemas of all files +)", 0) \ + DECLARE(UInt64Auto, schema_inference_make_columns_nullable, 1, R"( +Controls making inferred types `Nullable` in schema inference. +If the setting is enabled, all inferred type will be `Nullable`, if disabled, the inferred type will never be `Nullable`, if set to `auto`, the inferred type will be `Nullable` only if the column contains `NULL` in a sample that is parsed during schema inference or file metadata contains information about column nullability. +)", 0) \ + DECLARE(Bool, input_format_json_read_bools_as_numbers, true, R"( +Allow parsing bools as numbers in JSON input formats. + +Enabled by default. +)", 0) \ + DECLARE(Bool, input_format_json_read_bools_as_strings, true, R"( +Allow parsing bools as strings in JSON input formats. + +Enabled by default. +)", 0) \ + DECLARE(Bool, input_format_json_try_infer_numbers_from_strings, false, R"( +If enabled, during schema inference ClickHouse will try to infer numbers from string fields. +It can be useful if JSON data contains quoted UInt64 numbers. + +Disabled by default. +)", 0) \ + DECLARE(Bool, input_format_json_validate_types_from_metadata, true, R"( +For JSON/JSONCompact/JSONColumnsWithMetadata input formats, if this setting is set to 1, +the types from metadata in input data will be compared with the types of the corresponding columns from the table. + +Enabled by default. +)", 0) \ + DECLARE(Bool, input_format_json_read_numbers_as_strings, true, R"( +Allow parsing numbers as strings in JSON input formats. + +Enabled by default. +)", 0) \ + DECLARE(Bool, input_format_json_read_objects_as_strings, true, R"( +Allow parsing JSON objects as strings in JSON input formats. + +Example: + +```sql +SET input_format_json_read_objects_as_strings = 1; +CREATE TABLE test (id UInt64, obj String, date Date) ENGINE=Memory(); +INSERT INTO test FORMAT JSONEachRow {"id" : 1, "obj" : {"a" : 1, "b" : "Hello"}, "date" : "2020-01-01"}; +SELECT * FROM test; +``` + +Result: + +``` +┌─id─┬─obj──────────────────────┬───────date─┐ +│ 1 │ {"a" : 1, "b" : "Hello"} │ 2020-01-01 │ +└────┴──────────────────────────┴────────────┘ +``` + +Enabled by default. +)", 0) \ + DECLARE(Bool, input_format_json_read_arrays_as_strings, true, R"( +Allow parsing JSON arrays as strings in JSON input formats. + +Example: + +```sql +SET input_format_json_read_arrays_as_strings = 1; +SELECT arr, toTypeName(arr), JSONExtractArrayRaw(arr)[3] from format(JSONEachRow, 'arr String', '{"arr" : [1, "Hello", [1,2,3]]}'); +``` + +Result: +``` +┌─arr───────────────────┬─toTypeName(arr)─┬─arrayElement(JSONExtractArrayRaw(arr), 3)─┐ +│ [1, "Hello", [1,2,3]] │ String │ [1,2,3] │ +└───────────────────────┴─────────────────┴───────────────────────────────────────────┘ +``` + +Enabled by default. +)", 0) \ + DECLARE(Bool, input_format_json_try_infer_named_tuples_from_objects, true, R"( +If enabled, during schema inference ClickHouse will try to infer named Tuple from JSON objects. +The resulting named Tuple will contain all elements from all corresponding JSON objects from sample data. + +Example: + +```sql +SET input_format_json_try_infer_named_tuples_from_objects = 1; +DESC format(JSONEachRow, '{"obj" : {"a" : 42, "b" : "Hello"}}, {"obj" : {"a" : 43, "c" : [1, 2, 3]}}, {"obj" : {"d" : {"e" : 42}}}') +``` + +Result: + +``` +┌─name─┬─type───────────────────────────────────────────────────────────────────────────────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ obj │ Tuple(a Nullable(Int64), b Nullable(String), c Array(Nullable(Int64)), d Tuple(e Nullable(Int64))) │ │ │ │ │ │ +└──────┴────────────────────────────────────────────────────────────────────────────────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +Enabled by default. +)", 0) \ + DECLARE(Bool, input_format_json_use_string_type_for_ambiguous_paths_in_named_tuples_inference_from_objects, false, R"( +Use String type instead of an exception in case of ambiguous paths in JSON objects during named tuples inference +)", 0) \ + DECLARE(Bool, input_format_json_infer_incomplete_types_as_strings, true, R"( +Allow to use String type for JSON keys that contain only `Null`/`{}`/`[]` in data sample during schema inference. +In JSON formats any value can be read as String, and we can avoid errors like `Cannot determine type for column 'column_name' by first 25000 rows of data, most likely this column contains only Nulls or empty Arrays/Maps` during schema inference +by using String type for keys with unknown types. + +Example: + +```sql +SET input_format_json_infer_incomplete_types_as_strings = 1, input_format_json_try_infer_named_tuples_from_objects = 1; +DESCRIBE format(JSONEachRow, '{"obj" : {"a" : [1,2,3], "b" : "hello", "c" : null, "d" : {}, "e" : []}}'); +SELECT * FROM format(JSONEachRow, '{"obj" : {"a" : [1,2,3], "b" : "hello", "c" : null, "d" : {}, "e" : []}}'); +``` + +Result: +``` +┌─name─┬─type───────────────────────────────────────────────────────────────────────────────────────────────────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ obj │ Tuple(a Array(Nullable(Int64)), b Nullable(String), c Nullable(String), d Nullable(String), e Array(Nullable(String))) │ │ │ │ │ │ +└──────┴────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ + +┌─obj────────────────────────────┐ +│ ([1,2,3],'hello',NULL,'{}',[]) │ +└────────────────────────────────┘ +``` + +Enabled by default. +)", 0) \ + DECLARE(Bool, input_format_json_named_tuples_as_objects, true, R"( +Parse named tuple columns as JSON objects. + +Enabled by default. +)", 0) \ + DECLARE(Bool, input_format_json_ignore_unknown_keys_in_named_tuple, true, R"( +Ignore unknown keys in json object for named tuples. + +Enabled by default. +)", 0) \ + DECLARE(Bool, input_format_json_defaults_for_missing_elements_in_named_tuple, true, R"( +Insert default values for missing elements in JSON object while parsing named tuple. +This setting works only when setting `input_format_json_named_tuples_as_objects` is enabled. + +Enabled by default. +)", 0) \ + DECLARE(Bool, input_format_json_throw_on_bad_escape_sequence, true, R"( +Throw an exception if JSON string contains bad escape sequence in JSON input formats. If disabled, bad escape sequences will remain as is in the data. + +Enabled by default. +)", 0) \ + DECLARE(Bool, input_format_json_ignore_unnecessary_fields, true, R"( +Ignore unnecessary fields and not parse them. Enabling this may not throw exceptions on json strings of invalid format or with duplicated fields +)", 0) \ + DECLARE(Bool, input_format_try_infer_variants, false, R"( +If enabled, ClickHouse will try to infer type [`Variant`](../../sql-reference/data-types/variant.md) in schema inference for text formats when there is more than one possible type for column/array elements. + +Possible values: + +- 0 — Disabled. +- 1 — Enabled. +)", 0) \ + DECLARE(Bool, type_json_skip_duplicated_paths, false, R"( +When enabled, during parsing JSON object into JSON type duplicated paths will be ignored and only the first one will be inserted instead of an exception +)", 0) \ + DECLARE(UInt64, input_format_json_max_depth, 1000, R"( +Maximum depth of a field in JSON. This is not a strict limit, it does not have to be applied precisely. +)", 0) \ + DECLARE(Bool, input_format_json_empty_as_default, false, R"( +Treat empty fields in JSON input as default values. +)", 0) \ + DECLARE(Bool, input_format_try_infer_integers, true, R"( +If enabled, ClickHouse will try to infer integers instead of floats in schema inference for text formats. If all numbers in the column from input data are integers, the result type will be `Int64`, if at least one number is float, the result type will be `Float64`. + +Enabled by default. +)", 0) \ + DECLARE(Bool, input_format_try_infer_dates, true, R"( +If enabled, ClickHouse will try to infer type `Date` from string fields in schema inference for text formats. If all fields from a column in input data were successfully parsed as dates, the result type will be `Date`, if at least one field was not parsed as date, the result type will be `String`. + +Enabled by default. +)", 0) \ + DECLARE(Bool, input_format_try_infer_datetimes, true, R"( +If enabled, ClickHouse will try to infer type `DateTime64` from string fields in schema inference for text formats. If all fields from a column in input data were successfully parsed as datetimes, the result type will be `DateTime64`, if at least one field was not parsed as datetime, the result type will be `String`. + +Enabled by default. +)", 0) \ + DECLARE(Bool, input_format_try_infer_datetimes_only_datetime64, false, R"( +When input_format_try_infer_datetimes is enabled, infer only DateTime64 but not DateTime types +)", 0) \ + DECLARE(Bool, input_format_try_infer_exponent_floats, false, R"( +Try to infer floats in exponential notation while schema inference in text formats (except JSON, where exponent numbers are always inferred) +)", 0) \ + DECLARE(Bool, output_format_markdown_escape_special_characters, false, R"( +Escape special characters in Markdown +)", 0) \ + DECLARE(Bool, input_format_protobuf_flatten_google_wrappers, false, R"( +Enable Google wrappers for regular non-nested columns, e.g. google.protobuf.StringValue 'str' for String column 'str'. For Nullable columns empty wrappers are recognized as defaults, and missing as nulls +)", 0) \ + DECLARE(Bool, output_format_protobuf_nullables_with_google_wrappers, false, R"( +When serializing Nullable columns with Google wrappers, serialize default values as empty wrappers. If turned off, default and null values are not serialized +)", 0) \ + DECLARE(UInt64, input_format_csv_skip_first_lines, 0, R"( +Skip specified number of lines at the beginning of data in CSV format +)", 0) \ + DECLARE(UInt64, input_format_tsv_skip_first_lines, 0, R"( +Skip specified number of lines at the beginning of data in TSV format +)", 0) \ + DECLARE(Bool, input_format_csv_skip_trailing_empty_lines, false, R"( +Skip trailing empty lines in CSV format +)", 0) \ + DECLARE(Bool, input_format_tsv_skip_trailing_empty_lines, false, R"( +Skip trailing empty lines in TSV format +)", 0) \ + DECLARE(Bool, input_format_custom_skip_trailing_empty_lines, false, R"( +Skip trailing empty lines in CustomSeparated format +)", 0) \ + DECLARE(Bool, input_format_tsv_crlf_end_of_line, false, R"( +If it is set true, file function will read TSV format with \\r\\n instead of \\n. +)", 0) \ + \ + DECLARE(Bool, input_format_native_allow_types_conversion, true, R"( +Allow data types conversion in Native input format +)", 0) \ + DECLARE(Bool, input_format_native_decode_types_in_binary_format, false, R"( +Read data types in binary format instead of type names in Native input format +)", 0) \ + DECLARE(Bool, output_format_native_encode_types_in_binary_format, false, R"( +Write data types in binary format instead of type names in Native output format +)", 0) \ + DECLARE(Bool, output_format_native_write_json_as_string, false, R"( +Write data of [JSON](../../sql-reference/data-types/newjson.md) column as [String](../../sql-reference/data-types/string.md) column containing JSON strings instead of default native JSON serialization. +)", 0) \ + \ + DECLARE(DateTimeInputFormat, date_time_input_format, FormatSettings::DateTimeInputFormat::Basic, R"( +Allows choosing a parser of the text representation of date and time. + +The setting does not apply to [date and time functions](../../sql-reference/functions/date-time-functions.md). + +Possible values: + +- `'best_effort'` — Enables extended parsing. + + ClickHouse can parse the basic `YYYY-MM-DD HH:MM:SS` format and all [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601) date and time formats. For example, `'2018-06-08T01:02:03.000Z'`. + +- `'basic'` — Use basic parser. + + ClickHouse can parse only the basic `YYYY-MM-DD HH:MM:SS` or `YYYY-MM-DD` format. For example, `2019-08-20 10:18:56` or `2019-08-20`. + +Cloud default value: `'best_effort'`. + +See also: + +- [DateTime data type.](../../sql-reference/data-types/datetime.md) +- [Functions for working with dates and times.](../../sql-reference/functions/date-time-functions.md) +)", 0) \ + DECLARE(DateTimeOutputFormat, date_time_output_format, FormatSettings::DateTimeOutputFormat::Simple, R"( +Allows choosing different output formats of the text representation of date and time. + +Possible values: + +- `simple` - Simple output format. + + ClickHouse output date and time `YYYY-MM-DD hh:mm:ss` format. For example, `2019-08-20 10:18:56`. The calculation is performed according to the data type's time zone (if present) or server time zone. + +- `iso` - ISO output format. + + ClickHouse output date and time in [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601) `YYYY-MM-DDThh:mm:ssZ` format. For example, `2019-08-20T10:18:56Z`. Note that output is in UTC (`Z` means UTC). + +- `unix_timestamp` - Unix timestamp output format. + + ClickHouse output date and time in [Unix timestamp](https://en.wikipedia.org/wiki/Unix_time) format. For example `1566285536`. + +See also: + +- [DateTime data type.](../../sql-reference/data-types/datetime.md) +- [Functions for working with dates and times.](../../sql-reference/functions/date-time-functions.md) +)", 0) \ + DECLARE(IntervalOutputFormat, interval_output_format, FormatSettings::IntervalOutputFormat::Numeric, R"( +Allows choosing different output formats of the text representation of interval types. + +Possible values: + +- `kusto` - KQL-style output format. + + ClickHouse outputs intervals in [KQL format](https://learn.microsoft.com/en-us/dotnet/standard/base-types/standard-timespan-format-strings#the-constant-c-format-specifier). For example, `toIntervalDay(2)` would be formatted as `2.00:00:00`. Please note that for interval types of varying length (ie. `IntervalMonth` and `IntervalYear`) the average number of seconds per interval is taken into account. + +- `numeric` - Numeric output format. + + ClickHouse outputs intervals as their underlying numeric representation. For example, `toIntervalDay(2)` would be formatted as `2`. + +See also: + +- [Interval](../../sql-reference/data-types/special-data-types/interval.md) +)", 0) \ + \ + DECLARE(Bool, date_time_64_output_format_cut_trailing_zeros_align_to_groups_of_thousands, false, R"( +Dynamically trim the trailing zeros of datetime64 values to adjust the output scale to [0, 3, 6], +corresponding to 'seconds', 'milliseconds', and 'microseconds')", 0) \ + DECLARE(Bool, input_format_ipv4_default_on_conversion_error, false, R"( +Deserialization of IPv4 will use default values instead of throwing exception on conversion error. + +Disabled by default. +)", 0) \ + DECLARE(Bool, input_format_ipv6_default_on_conversion_error, false, R"( +Deserialization of IPV6 will use default values instead of throwing exception on conversion error. + +Disabled by default. +)", 0) \ + DECLARE(String, bool_true_representation, "true", R"( +Text to represent true bool value in TSV/CSV/Vertical/Pretty formats. +)", 0) \ + DECLARE(String, bool_false_representation, "false", R"( +Text to represent false bool value in TSV/CSV/Vertical/Pretty formats. +)", 0) \ + \ + DECLARE(Bool, input_format_values_interpret_expressions, true, R"( +For Values format: if the field could not be parsed by streaming parser, run SQL parser and try to interpret it as SQL expression. +)", 0) \ + DECLARE(Bool, input_format_values_deduce_templates_of_expressions, true, R"( +For Values format: if the field could not be parsed by streaming parser, run SQL parser, deduce template of the SQL expression, try to parse all rows using template and then interpret expression for all rows. +)", 0) \ + DECLARE(Bool, input_format_values_accurate_types_of_literals, true, R"( +For Values format: when parsing and interpreting expressions using template, check actual type of literal to avoid possible overflow and precision issues. +)", 0) \ + DECLARE(Bool, input_format_avro_allow_missing_fields, false, R"( +For Avro/AvroConfluent format: when field is not found in schema use default value instead of error +)", 0) \ + /** This setting is obsolete and do nothing, left for compatibility reasons. */ \ + DECLARE(Bool, input_format_avro_null_as_default, false, R"( +For Avro/AvroConfluent format: insert default in case of null and non Nullable column +)", 0) \ + DECLARE(UInt64, format_binary_max_string_size, 1_GiB, R"( +The maximum allowed size for String in RowBinary format. It prevents allocating large amount of memory in case of corrupted data. 0 means there is no limit +)", 0) \ + DECLARE(UInt64, format_binary_max_array_size, 1_GiB, R"( +The maximum allowed size for Array in RowBinary format. It prevents allocating large amount of memory in case of corrupted data. 0 means there is no limit +)", 0) \ + DECLARE(Bool, input_format_binary_decode_types_in_binary_format, false, R"( +Read data types in binary format instead of type names in RowBinaryWithNamesAndTypes input format +)", 0) \ + DECLARE(Bool, output_format_binary_encode_types_in_binary_format, false, R"( +Write data types in binary format instead of type names in RowBinaryWithNamesAndTypes output format +)", 0) \ + DECLARE(URI, format_avro_schema_registry_url, "", R"( +For AvroConfluent format: Confluent Schema Registry URL. +)", 0) \ + DECLARE(Bool, input_format_binary_read_json_as_string, false, R"( +Read values of [JSON](../../sql-reference/data-types/newjson.md) data type as JSON [String](../../sql-reference/data-types/string.md) values in RowBinary input format. +)", 0) \ + DECLARE(Bool, output_format_binary_write_json_as_string, false, R"( +Write values of [JSON](../../sql-reference/data-types/newjson.md) data type as JSON [String](../../sql-reference/data-types/string.md) values in RowBinary output format. +)", 0) \ + \ + DECLARE(Bool, output_format_json_quote_64bit_integers, true, R"( +Controls quoting of 64-bit or bigger [integers](../../sql-reference/data-types/int-uint.md) (like `UInt64` or `Int128`) when they are output in a [JSON](../../interfaces/formats.md/#json) format. +Such integers are enclosed in quotes by default. This behavior is compatible with most JavaScript implementations. + +Possible values: + +- 0 — Integers are output without quotes. +- 1 — Integers are enclosed in quotes. +)", 0) \ + DECLARE(Bool, output_format_json_quote_denormals, false, R"str( +Enables `+nan`, `-nan`, `+inf`, `-inf` outputs in [JSON](../../interfaces/formats.md/#json) output format. + +Possible values: + +- 0 — Disabled. +- 1 — Enabled. + +**Example** + +Consider the following table `account_orders`: + +```text +┌─id─┬─name───┬─duration─┬─period─┬─area─┐ +│ 1 │ Andrew │ 20 │ 0 │ 400 │ +│ 2 │ John │ 40 │ 0 │ 0 │ +│ 3 │ Bob │ 15 │ 0 │ -100 │ +└────┴────────┴──────────┴────────┴──────┘ +``` + +When `output_format_json_quote_denormals = 0`, the query returns `null` values in output: + +```sql +SELECT area/period FROM account_orders FORMAT JSON; +``` + +```json { -struct FormatFactorySettingsImpl; -struct SettingChange; -class SettingsChanges; + "meta": + [ + { + "name": "divide(area, period)", + "type": "Float64" + } + ], -#define FORMAT_SETTINGS_SUPPORTED_TYPES(CLASS_NAME, M) \ - M(CLASS_NAME, Bool) \ - M(CLASS_NAME, Char) \ - M(CLASS_NAME, Int64) \ - M(CLASS_NAME, UInt64) \ - M(CLASS_NAME, MsgPackUUIDRepresentation) \ - M(CLASS_NAME, SchemaInferenceMode) \ - M(CLASS_NAME, UInt64Auto) \ - M(CLASS_NAME, DateTimeInputFormat) \ - M(CLASS_NAME, DateTimeOutputFormat) \ - M(CLASS_NAME, IntervalOutputFormat) \ - M(CLASS_NAME, String) \ - M(CLASS_NAME, ParquetVersion) \ - M(CLASS_NAME, ParquetCompression) \ - M(CLASS_NAME, EscapingRule) \ - M(CLASS_NAME, ArrowCompression) \ - M(CLASS_NAME, CapnProtoEnumComparingMode) \ - M(CLASS_NAME, DateTimeOverflowBehavior) \ - M(CLASS_NAME, IdentifierQuotingStyle) + "data": + [ + { + "divide(area, period)": null + }, + { + "divide(area, period)": null + }, + { + "divide(area, period)": null + } + ], -FORMAT_SETTINGS_SUPPORTED_TYPES(FormatFactorySettings, DECLARE_SETTING_TRAIT) - -struct FormatFactorySettings -{ - FormatFactorySettings(); - ~FormatFactorySettings(); - - FORMAT_SETTINGS_SUPPORTED_TYPES(FormatFactorySettings, DECLARE_SETTING_SUBSCRIPT_OPERATOR) - - /// General API as needed - bool tryGet(std::string_view name, Field & value) const; - Field get(std::string_view name) const; - void set(std::string_view name, const Field & value); - bool has(std::string_view name) const; - void applyChange(const SettingChange & change); - void applyChanges(const SettingsChanges & changes); - -private: - std::unique_ptr impl; -}; + "rows": 3, + "statistics": + { + "elapsed": 0.003648093, + "rows_read": 3, + "bytes_read": 24 + } } +``` + +When `output_format_json_quote_denormals = 1`, the query returns: + +```json +{ + "meta": + [ + { + "name": "divide(area, period)", + "type": "Float64" + } + ], + + "data": + [ + { + "divide(area, period)": "inf" + }, + { + "divide(area, period)": "-nan" + }, + { + "divide(area, period)": "-inf" + } + ], + + "rows": 3, + + "statistics": + { + "elapsed": 0.000070241, + "rows_read": 3, + "bytes_read": 24 + } +} +``` +)str", 0) \ + DECLARE(Bool, output_format_json_quote_decimals, false, R"( +Controls quoting of decimals in JSON output formats. + +Disabled by default. +)", 0) \ + DECLARE(Bool, output_format_json_quote_64bit_floats, false, R"( +Controls quoting of 64-bit [floats](../../sql-reference/data-types/float.md) when they are output in JSON* formats. + +Disabled by default. +)", 0) \ + \ + DECLARE(Bool, output_format_json_escape_forward_slashes, true, R"( +Controls escaping forward slashes for string outputs in JSON output format. This is intended for compatibility with JavaScript. Don't confuse with backslashes that are always escaped. + +Enabled by default. +)", 0) \ + DECLARE(Bool, output_format_json_named_tuples_as_objects, true, R"( +Serialize named tuple columns as JSON objects. + +Enabled by default. +)", 0) \ + DECLARE(Bool, output_format_json_skip_null_value_in_named_tuples, false, R"( +Skip key value pairs with null value when serialize named tuple columns as JSON objects. It is only valid when output_format_json_named_tuples_as_objects is true. +)", 0) \ + DECLARE(Bool, output_format_json_array_of_rows, false, R"( +Enables the ability to output all rows as a JSON array in the [JSONEachRow](../../interfaces/formats.md/#jsoneachrow) format. + +Possible values: + +- 1 — ClickHouse outputs all rows as an array, each row in the `JSONEachRow` format. +- 0 — ClickHouse outputs each row separately in the `JSONEachRow` format. + +**Example of a query with the enabled setting** + +Query: + +```sql +SET output_format_json_array_of_rows = 1; +SELECT number FROM numbers(3) FORMAT JSONEachRow; +``` + +Result: + +```text +[ +{"number":"0"}, +{"number":"1"}, +{"number":"2"} +] +``` + +**Example of a query with the disabled setting** + +Query: + +```sql +SET output_format_json_array_of_rows = 0; +SELECT number FROM numbers(3) FORMAT JSONEachRow; +``` + +Result: + +```text +{"number":"0"} +{"number":"1"} +{"number":"2"} +``` +)", 0) \ + DECLARE(Bool, output_format_json_validate_utf8, false, R"( +Controls validation of UTF-8 sequences in JSON output formats, doesn't impact formats JSON/JSONCompact/JSONColumnsWithMetadata, they always validate UTF-8. + +Disabled by default. +)", 0) \ + \ + DECLARE(String, format_json_object_each_row_column_for_object_name, "", R"( +The name of column that will be used for storing/writing object names in [JSONObjectEachRow](../../interfaces/formats.md/#jsonobjecteachrow) format. +Column type should be String. If value is empty, default names `row_{i}`will be used for object names. + +### input_format_json_compact_allow_variable_number_of_columns {#input_format_json_compact_allow_variable_number_of_columns} + +Allow variable number of columns in rows in JSONCompact/JSONCompactEachRow input formats. +Ignore extra columns in rows with more columns than expected and treat missing columns as default values. + +Disabled by default. + +### output_format_markdown_escape_special_characters {#output_format_markdown_escape_special_characters} + +When enabled, escape special characters in Markdown. + +[Common Mark](https://spec.commonmark.org/0.30/#example-12) defines the following special characters that can be escaped by \: + +``` +! " # $ % & ' ( ) * + , - . / : ; < = > ? @ [ \ ] ^ _ ` { | } ~ +``` + +Possible values: + ++ 0 — Disable. ++ 1 — Enable. + +### input_format_json_empty_as_default {#input_format_json_empty_as_default} + +When enabled, replace empty input fields in JSON with default values. For complex default expressions `input_format_defaults_for_omitted_fields` must be enabled too. + +Possible values: + ++ 0 — Disable. ++ 1 — Enable. +)", 0) \ + \ + DECLARE(UInt64, output_format_pretty_max_rows, 10000, R"( +Rows limit for Pretty formats. +)", 0) \ + DECLARE(UInt64, output_format_pretty_max_column_pad_width, 250, R"( +Maximum width to pad all values in a column in Pretty formats. +)", 0) \ + DECLARE(UInt64, output_format_pretty_max_value_width, 10000, R"( +Maximum width of value to display in Pretty formats. If greater - it will be cut. +)", 0) \ + DECLARE(UInt64, output_format_pretty_max_value_width_apply_for_single_value, false, R"( +Only cut values (see the `output_format_pretty_max_value_width` setting) when it is not a single value in a block. Otherwise output it entirely, which is useful for the `SHOW CREATE TABLE` query. +)", 0) \ + DECLARE(UInt64Auto, output_format_pretty_color, "auto", R"( +Use ANSI escape sequences in Pretty formats. 0 - disabled, 1 - enabled, 'auto' - enabled if a terminal. +)", 0) \ + DECLARE(String, output_format_pretty_grid_charset, "UTF-8", R"( +Charset for printing grid borders. Available charsets: ASCII, UTF-8 (default one). +)", 0) \ + DECLARE(UInt64, output_format_pretty_display_footer_column_names, true, R"( +Display column names in the footer if there are many table rows. + +Possible values: + +- 0 — No column names are displayed in the footer. +- 1 — Column names are displayed in the footer if row count is greater than or equal to the threshold value set by [output_format_pretty_display_footer_column_names_min_rows](#output_format_pretty_display_footer_column_names_min_rows) (50 by default). + +**Example** + +Query: + +```sql +SELECT *, toTypeName(*) FROM (SELECT * FROM system.numbers LIMIT 1000); +``` + +Result: + +```response + ┌─number─┬─toTypeName(number)─┐ + 1. │ 0 │ UInt64 │ + 2. │ 1 │ UInt64 │ + 3. │ 2 │ UInt64 │ + ... + 999. │ 998 │ UInt64 │ +1000. │ 999 │ UInt64 │ + └─number─┴─toTypeName(number)─┘ +``` +)", 0) \ + DECLARE(UInt64, output_format_pretty_display_footer_column_names_min_rows, 50, R"( +Sets the minimum number of rows for which a footer with column names will be displayed if setting [output_format_pretty_display_footer_column_names](#output_format_pretty_display_footer_column_names) is enabled. +)", 0) \ + DECLARE(UInt64, output_format_parquet_row_group_size, 1000000, R"( +Target row group size in rows. +)", 0) \ + DECLARE(UInt64, output_format_parquet_row_group_size_bytes, 512 * 1024 * 1024, R"( +Target row group size in bytes, before compression. +)", 0) \ + DECLARE(Bool, output_format_parquet_string_as_string, true, R"( +Use Parquet String type instead of Binary for String columns. +)", 0) \ + DECLARE(Bool, output_format_parquet_fixed_string_as_fixed_byte_array, true, R"( +Use Parquet FIXED_LENGTH_BYTE_ARRAY type instead of Binary for FixedString columns. +)", 0) \ + DECLARE(ParquetVersion, output_format_parquet_version, "2.latest", R"( +Parquet format version for output format. Supported versions: 1.0, 2.4, 2.6 and 2.latest (default) +)", 0) \ + DECLARE(ParquetCompression, output_format_parquet_compression_method, "zstd", R"( +Compression method for Parquet output format. Supported codecs: snappy, lz4, brotli, zstd, gzip, none (uncompressed) +)", 0) \ + DECLARE(Bool, output_format_parquet_compliant_nested_types, true, R"( +In parquet file schema, use name 'element' instead of 'item' for list elements. This is a historical artifact of Arrow library implementation. Generally increases compatibility, except perhaps with some old versions of Arrow. +)", 0) \ + DECLARE(Bool, output_format_parquet_use_custom_encoder, true, R"( +Use a faster Parquet encoder implementation. +)", 0) \ + DECLARE(Bool, output_format_parquet_parallel_encoding, true, R"( +Do Parquet encoding in multiple threads. Requires output_format_parquet_use_custom_encoder. +)", 0) \ + DECLARE(UInt64, output_format_parquet_data_page_size, 1024 * 1024, R"( +Target page size in bytes, before compression. +)", 0) \ + DECLARE(UInt64, output_format_parquet_batch_size, 1024, R"( +Check page size every this many rows. Consider decreasing if you have columns with average values size above a few KBs. +)", 0) \ + DECLARE(Bool, output_format_parquet_write_page_index, true, R"( +Add a possibility to write page index into parquet files. +)", 0) \ + DECLARE(String, output_format_avro_codec, "", R"( +Compression codec used for output. Possible values: 'null', 'deflate', 'snappy', 'zstd'. +)", 0) \ + DECLARE(UInt64, output_format_avro_sync_interval, 16 * 1024, R"( +Sync interval in bytes. +)", 0) \ + DECLARE(String, output_format_avro_string_column_pattern, "", R"( +For Avro format: regexp of String columns to select as AVRO string. +)", 0) \ + DECLARE(UInt64, output_format_avro_rows_in_file, 1, R"( +Max rows in a file (if permitted by storage) +)", 0) \ + DECLARE(Bool, output_format_tsv_crlf_end_of_line, false, R"( +If it is set true, end of line in TSV format will be \\r\\n instead of \\n. +)", 0) \ + DECLARE(String, format_csv_null_representation, "\\N", R"( +Custom NULL representation in CSV format +)", 0) \ + DECLARE(String, format_tsv_null_representation, "\\N", R"( +Custom NULL representation in TSV format +)", 0) \ + DECLARE(Bool, output_format_decimal_trailing_zeros, false, R"( +Output trailing zeros when printing Decimal values. E.g. 1.230000 instead of 1.23. + +Disabled by default. +)", 0) \ + \ + DECLARE(UInt64, input_format_allow_errors_num, 0, R"( +Sets the maximum number of acceptable errors when reading from text formats (CSV, TSV, etc.). + +The default value is 0. + +Always pair it with `input_format_allow_errors_ratio`. + +If an error occurred while reading rows but the error counter is still less than `input_format_allow_errors_num`, ClickHouse ignores the row and moves on to the next one. + +If both `input_format_allow_errors_num` and `input_format_allow_errors_ratio` are exceeded, ClickHouse throws an exception. +)", 0) \ + DECLARE(Float, input_format_allow_errors_ratio, 0, R"( +Sets the maximum percentage of errors allowed when reading from text formats (CSV, TSV, etc.). +The percentage of errors is set as a floating-point number between 0 and 1. + +The default value is 0. + +Always pair it with `input_format_allow_errors_num`. + +If an error occurred while reading rows but the error counter is still less than `input_format_allow_errors_ratio`, ClickHouse ignores the row and moves on to the next one. + +If both `input_format_allow_errors_num` and `input_format_allow_errors_ratio` are exceeded, ClickHouse throws an exception. +)", 0) \ + DECLARE(String, input_format_record_errors_file_path, "", R"( +Path of the file used to record errors while reading text formats (CSV, TSV). +)", 0) \ + DECLARE(String, errors_output_format, "CSV", R"( +Method to write Errors to text output. +)", 0) \ + \ + DECLARE(String, format_schema, "", R"( +This parameter is useful when you are using formats that require a schema definition, such as [Cap’n Proto](https://capnproto.org/) or [Protobuf](https://developers.google.com/protocol-buffers/). The value depends on the format. +)", 0) \ + DECLARE(String, format_template_resultset, "", R"( +Path to file which contains format string for result set (for Template format) +)", 0) \ + DECLARE(String, format_template_row, "", R"( +Path to file which contains format string for rows (for Template format) +)", 0) \ + DECLARE(String, format_template_row_format, "", R"( +Format string for rows (for Template format) +)", 0) \ + DECLARE(String, format_template_resultset_format, "", R"( +Format string for result set (for Template format) +)", 0) \ + DECLARE(String, format_template_rows_between_delimiter, "\n", R"( +Delimiter between rows (for Template format) +)", 0) \ + \ + DECLARE(EscapingRule, format_custom_escaping_rule, "Escaped", R"( +Field escaping rule (for CustomSeparated format) +)", 0) \ + DECLARE(String, format_custom_field_delimiter, "\t", R"( +Delimiter between fields (for CustomSeparated format) +)", 0) \ + DECLARE(String, format_custom_row_before_delimiter, "", R"( +Delimiter before field of the first column (for CustomSeparated format) +)", 0) \ + DECLARE(String, format_custom_row_after_delimiter, "\n", R"( +Delimiter after field of the last column (for CustomSeparated format) +)", 0) \ + DECLARE(String, format_custom_row_between_delimiter, "", R"( +Delimiter between rows (for CustomSeparated format) +)", 0) \ + DECLARE(String, format_custom_result_before_delimiter, "", R"( +Prefix before result set (for CustomSeparated format) +)", 0) \ + DECLARE(String, format_custom_result_after_delimiter, "", R"( +Suffix after result set (for CustomSeparated format) +)", 0) \ + \ + DECLARE(String, format_regexp, "", R"( +Regular expression (for Regexp format) +)", 0) \ + DECLARE(EscapingRule, format_regexp_escaping_rule, "Raw", R"( +Field escaping rule (for Regexp format) +)", 0) \ + DECLARE(Bool, format_regexp_skip_unmatched, false, R"( +Skip lines unmatched by regular expression (for Regexp format) +)", 0) \ + \ + DECLARE(Bool, output_format_enable_streaming, false, R"( +Enable streaming in output formats that support it. + +Disabled by default. +)", 0) \ + DECLARE(Bool, output_format_write_statistics, true, R"( +Write statistics about read rows, bytes, time elapsed in suitable output formats. + +Enabled by default +)", 0) \ + DECLARE(Bool, output_format_pretty_row_numbers, true, R"( +Add row numbers before each row for pretty output format +)", 0) \ + DECLARE(Bool, output_format_pretty_highlight_digit_groups, true, R"( +If enabled and if output is a terminal, highlight every digit corresponding to the number of thousands, millions, etc. with underline. +)", 0) \ + DECLARE(UInt64, output_format_pretty_single_large_number_tip_threshold, 1'000'000, R"( +Print a readable number tip on the right side of the table if the block consists of a single number which exceeds this value (except 0) +)", 0) \ + DECLARE(Bool, insert_distributed_one_random_shard, false, R"( +Enables or disables random shard insertion into a [Distributed](../../engines/table-engines/special/distributed.md/#distributed) table when there is no distributed key. + +By default, when inserting data into a `Distributed` table with more than one shard, the ClickHouse server will reject any insertion request if there is no distributed key. When `insert_distributed_one_random_shard = 1`, insertions are allowed and data is forwarded randomly among all shards. + +Possible values: + +- 0 — Insertion is rejected if there are multiple shards and no distributed key is given. +- 1 — Insertion is done randomly among all available shards when no distributed key is given. +)", 0) \ + \ + DECLARE(Bool, exact_rows_before_limit, false, R"( +When enabled, ClickHouse will provide exact value for rows_before_limit_at_least statistic, but with the cost that the data before limit will have to be read completely +)", 0) \ + DECLARE(Bool, rows_before_aggregation, false, R"( +When enabled, ClickHouse will provide exact value for rows_before_aggregation statistic, represents the number of rows read before aggregation +)", 0) \ + DECLARE(UInt64, cross_to_inner_join_rewrite, 1, R"( +Use inner join instead of comma/cross join if there are joining expressions in the WHERE section. Values: 0 - no rewrite, 1 - apply if possible for comma/cross, 2 - force rewrite all comma joins, cross - if possible +)", 0) \ + \ + DECLARE(Bool, output_format_arrow_low_cardinality_as_dictionary, false, R"( +Enable output LowCardinality type as Dictionary Arrow type +)", 0) \ + DECLARE(Bool, output_format_arrow_use_signed_indexes_for_dictionary, true, R"( +Use signed integers for dictionary indexes in Arrow format +)", 0) \ + DECLARE(Bool, output_format_arrow_use_64_bit_indexes_for_dictionary, false, R"( +Always use 64 bit integers for dictionary indexes in Arrow format +)", 0) \ + DECLARE(Bool, output_format_arrow_string_as_string, true, R"( +Use Arrow String type instead of Binary for String columns +)", 0) \ + DECLARE(Bool, output_format_arrow_fixed_string_as_fixed_byte_array, true, R"( +Use Arrow FIXED_SIZE_BINARY type instead of Binary for FixedString columns. +)", 0) \ + DECLARE(ArrowCompression, output_format_arrow_compression_method, "lz4_frame", R"( +Compression method for Arrow output format. Supported codecs: lz4_frame, zstd, none (uncompressed) +)", 0) \ + \ + DECLARE(Bool, output_format_orc_string_as_string, true, R"( +Use ORC String type instead of Binary for String columns +)", 0) \ + DECLARE(ORCCompression, output_format_orc_compression_method, "zstd", R"( +Compression method for ORC output format. Supported codecs: lz4, snappy, zlib, zstd, none (uncompressed) +)", 0) \ + DECLARE(UInt64, output_format_orc_row_index_stride, 10'000, R"( +Target row index stride in ORC output format +)", 0) \ + DECLARE(Double, output_format_orc_dictionary_key_size_threshold, 0.0, R"( +For a string column in ORC output format, if the number of distinct values is greater than this fraction of the total number of non-null rows, turn off dictionary encoding. Otherwise dictionary encoding is enabled +)", 0) \ + \ + DECLARE(CapnProtoEnumComparingMode, format_capn_proto_enum_comparising_mode, FormatSettings::CapnProtoEnumComparingMode::BY_VALUES, R"( +How to map ClickHouse Enum and CapnProto Enum +)", 0) \ + \ + DECLARE(Bool, format_capn_proto_use_autogenerated_schema, true, R"( +Use autogenerated CapnProto schema when format_schema is not set +)", 0) \ + DECLARE(Bool, format_protobuf_use_autogenerated_schema, true, R"( +Use autogenerated Protobuf when format_schema is not set +)", 0) \ + DECLARE(String, output_format_schema, "", R"( +The path to the file where the automatically generated schema will be saved in [Cap’n Proto](../../interfaces/formats.md#capnproto-capnproto) or [Protobuf](../../interfaces/formats.md#protobuf-protobuf) formats. +)", 0) \ + \ + DECLARE(String, input_format_mysql_dump_table_name, "", R"( +Name of the table in MySQL dump from which to read data +)", 0) \ + DECLARE(Bool, input_format_mysql_dump_map_column_names, true, R"( +Match columns from table in MySQL dump and columns from ClickHouse table by names +)", 0) \ + \ + DECLARE(UInt64, output_format_sql_insert_max_batch_size, DEFAULT_BLOCK_SIZE, R"( +The maximum number of rows in one INSERT statement. +)", 0) \ + DECLARE(String, output_format_sql_insert_table_name, "table", R"( +The name of table in the output INSERT query +)", 0) \ + DECLARE(Bool, output_format_sql_insert_include_column_names, true, R"( +Include column names in INSERT query +)", 0) \ + DECLARE(Bool, output_format_sql_insert_use_replace, false, R"( +Use REPLACE statement instead of INSERT +)", 0) \ + DECLARE(Bool, output_format_sql_insert_quote_names, true, R"( +Quote column names with '`' characters +)", 0) \ + \ + DECLARE(Bool, output_format_values_escape_quote_with_quote, false, R"( +If true escape ' with '', otherwise quoted with \\' +)", 0) \ + \ + DECLARE(Bool, output_format_bson_string_as_string, false, R"( +Use BSON String type instead of Binary for String columns. +)", 0) \ + DECLARE(Bool, input_format_bson_skip_fields_with_unsupported_types_in_schema_inference, false, R"( +Skip fields with unsupported types while schema inference for format BSON. +)", 0) \ + \ + DECLARE(Bool, format_display_secrets_in_show_and_select, false, R"( +Enables or disables showing secrets in `SHOW` and `SELECT` queries for tables, databases, +table functions, and dictionaries. + +User wishing to see secrets must also have +[`display_secrets_in_show_and_select` server setting](../server-configuration-parameters/settings#display_secrets_in_show_and_select) +turned on and a +[`displaySecretsInShowAndSelect`](../../sql-reference/statements/grant#display-secrets) privilege. + +Possible values: + +- 0 — Disabled. +- 1 — Enabled. +)", IMPORTANT) \ + DECLARE(Bool, regexp_dict_allow_hyperscan, true, R"( +Allow regexp_tree dictionary using Hyperscan library. +)", 0) \ + DECLARE(Bool, regexp_dict_flag_case_insensitive, false, R"( +Use case-insensitive matching for a regexp_tree dictionary. Can be overridden in individual expressions with (?i) and (?-i). +)", 0) \ + DECLARE(Bool, regexp_dict_flag_dotall, false, R"( +Allow '.' to match newline characters for a regexp_tree dictionary. +)", 0) \ + \ + DECLARE(Bool, dictionary_use_async_executor, false, R"( +Execute a pipeline for reading dictionary source in several threads. It's supported only by dictionaries with local CLICKHOUSE source. +)", 0) \ + DECLARE(Bool, precise_float_parsing, false, R"( +Prefer more precise (but slower) float parsing algorithm +)", 0) \ + DECLARE(DateTimeOverflowBehavior, date_time_overflow_behavior, "ignore", R"( +Overflow mode for Date, Date32, DateTime, DateTime64 types. Possible values: 'ignore', 'throw', 'saturate'. +)", 0) \ + DECLARE(Bool, validate_experimental_and_suspicious_types_inside_nested_types, true, R"( +Validate usage of experimental and suspicious types inside nested types like Array/Map/Tuple +)", 0) \ + \ + DECLARE(IdentifierQuotingRule, show_create_query_identifier_quoting_rule, IdentifierQuotingRule::WhenNecessary, R"( +Set the quoting rule for identifiers in SHOW CREATE query +)", 0) \ + DECLARE(IdentifierQuotingStyle, show_create_query_identifier_quoting_style, IdentifierQuotingStyle::Backticks, R"( +Set the quoting style for identifiers in SHOW CREATE query +)", 0) \ + +// End of FORMAT_FACTORY_SETTINGS + +#define OBSOLETE_FORMAT_SETTINGS(M, ALIAS) \ + /** Obsolete format settings that do nothing but left for compatibility reasons. Remove each one after half a year of obsolescence. */ \ + MAKE_OBSOLETE(M, Bool, input_format_arrow_import_nested, false) \ + MAKE_OBSOLETE(M, Bool, input_format_parquet_import_nested, false) \ + MAKE_OBSOLETE(M, Bool, input_format_orc_import_nested, false) \ + +#endif // __CLION_IDE__ + +#define LIST_OF_ALL_FORMAT_SETTINGS(M, ALIAS) \ + FORMAT_FACTORY_SETTINGS(M, ALIAS) \ + OBSOLETE_FORMAT_SETTINGS(M, ALIAS) + diff --git a/src/Core/FormatFactorySettingsDeclaration.h b/src/Core/FormatFactorySettingsDeclaration.h deleted file mode 100644 index d725e441e46..00000000000 --- a/src/Core/FormatFactorySettingsDeclaration.h +++ /dev/null @@ -1,1259 +0,0 @@ -#pragma once - -#include - -/// This header exists so we can share it between Settings.cpp, FormatFactorySettings.cpp and other storage settings - -// clang-format off -#if defined(__CLION_IDE__) -/// CLion freezes for a minute every time it processes this -#define FORMAT_FACTORY_SETTINGS(M, ALIAS) -#define OBSOLETE_FORMAT_SETTINGS(M, ALIAS) -#else - -#define FORMAT_FACTORY_SETTINGS(M, ALIAS) \ - M(Char, format_csv_delimiter, ',', R"( -The character to be considered as a delimiter in CSV data. If setting with a string, a string has to have a length of 1. -)", 0) \ - M(Bool, format_csv_allow_single_quotes, false, R"( -If it is set to true, allow strings in single quotes. -)", 0) \ - M(Bool, format_csv_allow_double_quotes, true, R"( -If it is set to true, allow strings in double quotes. -)", 0) \ - M(Bool, output_format_csv_serialize_tuple_into_separate_columns, true, R"( -If it set to true, then Tuples in CSV format are serialized as separate columns (that is, their nesting in the tuple is lost) -)", 0) \ - M(Bool, input_format_csv_deserialize_separate_columns_into_tuple, true, R"( -If it set to true, then separate columns written in CSV format can be deserialized to Tuple column. -)", 0) \ - M(Bool, output_format_csv_crlf_end_of_line, false, R"( -If it is set true, end of line in CSV format will be \\r\\n instead of \\n. -)", 0) \ - M(Bool, input_format_csv_allow_cr_end_of_line, false, R"( -If it is set true, \\r will be allowed at end of line not followed by \\n -)", 0) \ - M(Bool, input_format_csv_enum_as_number, false, R"( -Treat inserted enum values in CSV formats as enum indices -)", 0) \ - M(Bool, input_format_csv_arrays_as_nested_csv, false, R"( -When reading Array from CSV, expect that its elements were serialized in nested CSV and then put into string. Example: \"[\"\"Hello\"\", \"\"world\"\", \"\"42\"\"\"\" TV\"\"]\". Braces around array can be omitted. -)", 0) \ - M(Bool, input_format_skip_unknown_fields, true, R"( -Enables or disables skipping insertion of extra data. - -When writing data, ClickHouse throws an exception if input data contain columns that do not exist in the target table. If skipping is enabled, ClickHouse does not insert extra data and does not throw an exception. - -Supported formats: - -- [JSONEachRow](../../interfaces/formats.md/#jsoneachrow) (and other JSON formats) -- [BSONEachRow](../../interfaces/formats.md/#bsoneachrow) (and other JSON formats) -- [TSKV](../../interfaces/formats.md/#tskv) -- All formats with suffixes WithNames/WithNamesAndTypes -- [MySQLDump](../../interfaces/formats.md/#mysqldump) -- [Native](../../interfaces/formats.md/#native) - -Possible values: - -- 0 — Disabled. -- 1 — Enabled. -)", 0) \ - M(Bool, input_format_with_names_use_header, true, R"( -Enables or disables checking the column order when inserting data. - -To improve insert performance, we recommend disabling this check if you are sure that the column order of the input data is the same as in the target table. - -Supported formats: - -- [CSVWithNames](../../interfaces/formats.md/#csvwithnames) -- [CSVWithNamesAndTypes](../../interfaces/formats.md/#csvwithnamesandtypes) -- [TabSeparatedWithNames](../../interfaces/formats.md/#tabseparatedwithnames) -- [TabSeparatedWithNamesAndTypes](../../interfaces/formats.md/#tabseparatedwithnamesandtypes) -- [JSONCompactEachRowWithNames](../../interfaces/formats.md/#jsoncompacteachrowwithnames) -- [JSONCompactEachRowWithNamesAndTypes](../../interfaces/formats.md/#jsoncompacteachrowwithnamesandtypes) -- [JSONCompactStringsEachRowWithNames](../../interfaces/formats.md/#jsoncompactstringseachrowwithnames) -- [JSONCompactStringsEachRowWithNamesAndTypes](../../interfaces/formats.md/#jsoncompactstringseachrowwithnamesandtypes) -- [RowBinaryWithNames](../../interfaces/formats.md/#rowbinarywithnames) -- [RowBinaryWithNamesAndTypes](../../interfaces/formats.md/#rowbinarywithnamesandtypes) -- [CustomSeparatedWithNames](../../interfaces/formats.md/#customseparatedwithnames) -- [CustomSeparatedWithNamesAndTypes](../../interfaces/formats.md/#customseparatedwithnamesandtypes) - -Possible values: - -- 0 — Disabled. -- 1 — Enabled. -)", 0) \ - M(Bool, input_format_with_types_use_header, true, R"( -Controls whether format parser should check if data types from the input data match data types from the target table. - -Supported formats: - -- [CSVWithNamesAndTypes](../../interfaces/formats.md/#csvwithnamesandtypes) -- [TabSeparatedWithNamesAndTypes](../../interfaces/formats.md/#tabseparatedwithnamesandtypes) -- [JSONCompactEachRowWithNamesAndTypes](../../interfaces/formats.md/#jsoncompacteachrowwithnamesandtypes) -- [JSONCompactStringsEachRowWithNamesAndTypes](../../interfaces/formats.md/#jsoncompactstringseachrowwithnamesandtypes) -- [RowBinaryWithNamesAndTypes](../../interfaces/formats.md/#rowbinarywithnamesandtypes-rowbinarywithnamesandtypes) -- [CustomSeparatedWithNamesAndTypes](../../interfaces/formats.md/#customseparatedwithnamesandtypes) - -Possible values: - -- 0 — Disabled. -- 1 — Enabled. -)", 0) \ - M(Bool, input_format_import_nested_json, false, R"( -Enables or disables the insertion of JSON data with nested objects. - -Supported formats: - -- [JSONEachRow](../../interfaces/formats.md/#jsoneachrow) - -Possible values: - -- 0 — Disabled. -- 1 — Enabled. - -See also: - -- [Usage of Nested Structures](../../interfaces/formats.md/#jsoneachrow-nested) with the `JSONEachRow` format. -)", 0) \ - M(Bool, input_format_defaults_for_omitted_fields, true, R"( -When performing `INSERT` queries, replace omitted input column values with default values of the respective columns. This option applies to [JSONEachRow](../../interfaces/formats.md/#jsoneachrow) (and other JSON formats), [CSV](../../interfaces/formats.md/#csv), [TabSeparated](../../interfaces/formats.md/#tabseparated), [TSKV](../../interfaces/formats.md/#tskv), [Parquet](../../interfaces/formats.md/#parquet), [Arrow](../../interfaces/formats.md/#arrow), [Avro](../../interfaces/formats.md/#avro), [ORC](../../interfaces/formats.md/#orc), [Native](../../interfaces/formats.md/#native) formats and formats with `WithNames`/`WithNamesAndTypes` suffixes. - -:::note -When this option is enabled, extended table metadata are sent from server to client. It consumes additional computing resources on the server and can reduce performance. -::: - -Possible values: - -- 0 — Disabled. -- 1 — Enabled. -)", IMPORTANT) \ - M(Bool, input_format_csv_empty_as_default, true, R"( -Treat empty fields in CSV input as default values. -)", 0) \ - M(Bool, input_format_tsv_empty_as_default, false, R"( -Treat empty fields in TSV input as default values. -)", 0) \ - M(Bool, input_format_tsv_enum_as_number, false, R"( -Treat inserted enum values in TSV formats as enum indices. -)", 0) \ - M(Bool, input_format_null_as_default, true, R"( -Enables or disables the initialization of [NULL](../../sql-reference/syntax.md/#null-literal) fields with [default values](../../sql-reference/statements/create/table.md/#create-default-values), if data type of these fields is not [nullable](../../sql-reference/data-types/nullable.md/#data_type-nullable). -If column type is not nullable and this setting is disabled, then inserting `NULL` causes an exception. If column type is nullable, then `NULL` values are inserted as is, regardless of this setting. - -This setting is applicable for most input formats. - -For complex default expressions `input_format_defaults_for_omitted_fields` must be enabled too. - -Possible values: - -- 0 — Inserting `NULL` into a not nullable column causes an exception. -- 1 — `NULL` fields are initialized with default column values. -)", 0) \ - M(Bool, input_format_force_null_for_omitted_fields, false, R"( -Force initialize omitted fields with null values -)", 0) \ - M(Bool, input_format_arrow_case_insensitive_column_matching, false, R"( -Ignore case when matching Arrow columns with CH columns. -)", 0) \ - M(Int64, input_format_orc_row_batch_size, 100'000, R"( -Batch size when reading ORC stripes. -)", 0) \ - M(Bool, input_format_orc_case_insensitive_column_matching, false, R"( -Ignore case when matching ORC columns with CH columns. -)", 0) \ - M(Bool, input_format_parquet_case_insensitive_column_matching, false, R"( -Ignore case when matching Parquet columns with CH columns. -)", 0) \ - M(Bool, input_format_parquet_preserve_order, false, R"( -Avoid reordering rows when reading from Parquet files. Usually makes it much slower. -)", 0) \ - M(Bool, input_format_parquet_filter_push_down, true, R"( -When reading Parquet files, skip whole row groups based on the WHERE/PREWHERE expressions and min/max statistics in the Parquet metadata. -)", 0) \ - M(Bool, input_format_parquet_bloom_filter_push_down, false, R"( -When reading Parquet files, skip whole row groups based on the WHERE expressions and bloom filter in the Parquet metadata. -)", 0) \ - M(Bool, input_format_parquet_use_native_reader, false, R"( -When reading Parquet files, to use native reader instead of arrow reader. -)", 0) \ - M(Bool, input_format_allow_seeks, true, R"( -Allow seeks while reading in ORC/Parquet/Arrow input formats. - -Enabled by default. -)", 0) \ - M(Bool, input_format_orc_allow_missing_columns, true, R"( -Allow missing columns while reading ORC input formats -)", 0) \ - M(Bool, input_format_orc_use_fast_decoder, true, R"( -Use a faster ORC decoder implementation. -)", 0) \ - M(Bool, input_format_orc_filter_push_down, true, R"( -When reading ORC files, skip whole stripes or row groups based on the WHERE/PREWHERE expressions, min/max statistics or bloom filter in the ORC metadata. -)", 0) \ - M(String, input_format_orc_reader_time_zone_name, "GMT", R"( -The time zone name for ORC row reader, the default ORC row reader's time zone is GMT. -)", 0) \ - M(Bool, input_format_orc_dictionary_as_low_cardinality, true, R"( -Treat ORC dictionary encoded columns as LowCardinality columns while reading ORC files. -)", 0) \ - M(Bool, input_format_parquet_allow_missing_columns, true, R"( -Allow missing columns while reading Parquet input formats -)", 0) \ - M(UInt64, input_format_parquet_local_file_min_bytes_for_seek, 8192, R"( -Min bytes required for local read (file) to do seek, instead of read with ignore in Parquet input format -)", 0) \ - M(Bool, input_format_parquet_enable_row_group_prefetch, true, R"( -Enable row group prefetching during parquet parsing. Currently, only single-threaded parsing can prefetch. -)", 0) \ - M(Bool, input_format_arrow_allow_missing_columns, true, R"( -Allow missing columns while reading Arrow input formats -)", 0) \ - M(Char, input_format_hive_text_fields_delimiter, '\x01', R"( -Delimiter between fields in Hive Text File -)", 0) \ - M(Char, input_format_hive_text_collection_items_delimiter, '\x02', R"( -Delimiter between collection(array or map) items in Hive Text File -)", 0) \ - M(Char, input_format_hive_text_map_keys_delimiter, '\x03', R"( -Delimiter between a pair of map key/values in Hive Text File -)", 0) \ - M(Bool, input_format_hive_text_allow_variable_number_of_columns, true, R"( -Ignore extra columns in Hive Text input (if file has more columns than expected) and treat missing fields in Hive Text input as default values -)", 0) \ - M(UInt64, input_format_msgpack_number_of_columns, 0, R"( -The number of columns in inserted MsgPack data. Used for automatic schema inference from data. -)", 0) \ - M(MsgPackUUIDRepresentation, output_format_msgpack_uuid_representation, FormatSettings::MsgPackUUIDRepresentation::EXT, R"( -The way how to output UUID in MsgPack format. -)", 0) \ - M(UInt64, input_format_max_rows_to_read_for_schema_inference, 25000, R"( -The maximum rows of data to read for automatic schema inference. -)", 0) \ - M(UInt64, input_format_max_bytes_to_read_for_schema_inference, 32 * 1024 * 1024, R"( -The maximum amount of data in bytes to read for automatic schema inference. -)", 0) \ - M(Bool, input_format_csv_use_best_effort_in_schema_inference, true, R"( -Use some tweaks and heuristics to infer schema in CSV format -)", 0) \ - M(Bool, input_format_csv_try_infer_numbers_from_strings, false, R"( -If enabled, during schema inference ClickHouse will try to infer numbers from string fields. -It can be useful if CSV data contains quoted UInt64 numbers. - -Disabled by default. -)", 0) \ - M(Bool, input_format_csv_try_infer_strings_from_quoted_tuples, true, R"( -Interpret quoted tuples in the input data as a value of type String. -)", 0) \ - M(Bool, input_format_tsv_use_best_effort_in_schema_inference, true, R"( -Use some tweaks and heuristics to infer schema in TSV format -)", 0) \ - M(Bool, input_format_csv_detect_header, true, R"( -Automatically detect header with names and types in CSV format -)", 0) \ - M(Bool, input_format_csv_allow_whitespace_or_tab_as_delimiter, false, R"( -Allow to use spaces and tabs(\\t) as field delimiter in the CSV strings -)", 0) \ - M(Bool, input_format_csv_trim_whitespaces, true, R"( -Trims spaces and tabs (\\t) characters at the beginning and end in CSV strings -)", 0) \ - M(Bool, input_format_csv_use_default_on_bad_values, false, R"( -Allow to set default value to column when CSV field deserialization failed on bad value -)", 0) \ - M(Bool, input_format_csv_allow_variable_number_of_columns, false, R"( -Ignore extra columns in CSV input (if file has more columns than expected) and treat missing fields in CSV input as default values -)", 0) \ - M(Bool, input_format_tsv_allow_variable_number_of_columns, false, R"( -Ignore extra columns in TSV input (if file has more columns than expected) and treat missing fields in TSV input as default values -)", 0) \ - M(Bool, input_format_custom_allow_variable_number_of_columns, false, R"( -Ignore extra columns in CustomSeparated input (if file has more columns than expected) and treat missing fields in CustomSeparated input as default values -)", 0) \ - M(Bool, input_format_json_compact_allow_variable_number_of_columns, false, R"( -Ignore extra columns in JSONCompact(EachRow) input (if file has more columns than expected) and treat missing fields in JSONCompact(EachRow) input as default values -)", 0) \ - M(Bool, input_format_tsv_detect_header, true, R"( -Automatically detect header with names and types in TSV format -)", 0) \ - M(Bool, input_format_custom_detect_header, true, R"( -Automatically detect header with names and types in CustomSeparated format -)", 0) \ - M(Bool, input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference, false, R"( -Skip columns with unsupported types while schema inference for format Parquet -)", 0) \ - M(UInt64, input_format_parquet_max_block_size, DEFAULT_BLOCK_SIZE, R"( -Max block size for parquet reader. -)", 0) \ - M(UInt64, input_format_parquet_prefer_block_bytes, DEFAULT_BLOCK_SIZE * 256, R"( -Average block bytes output by parquet reader -)", 0) \ - M(Bool, input_format_protobuf_skip_fields_with_unsupported_types_in_schema_inference, false, R"( -Skip fields with unsupported types while schema inference for format Protobuf -)", 0) \ - M(Bool, input_format_capn_proto_skip_fields_with_unsupported_types_in_schema_inference, false, R"( -Skip columns with unsupported types while schema inference for format CapnProto -)", 0) \ - M(Bool, input_format_orc_skip_columns_with_unsupported_types_in_schema_inference, false, R"( -Skip columns with unsupported types while schema inference for format ORC -)", 0) \ - M(Bool, input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference, false, R"( -Skip columns with unsupported types while schema inference for format Arrow -)", 0) \ - M(String, column_names_for_schema_inference, "", R"( -The list of column names to use in schema inference for formats without column names. The format: 'column1,column2,column3,...' -)", 0) \ - M(String, schema_inference_hints, "", R"( -The list of column names and types to use as hints in schema inference for formats without schema. - -Example: - -Query: -```sql -desc format(JSONEachRow, '{"x" : 1, "y" : "String", "z" : "0.0.0.0" }') settings schema_inference_hints='x UInt8, z IPv4'; -``` - -Result: -```sql -x UInt8 -y Nullable(String) -z IPv4 -``` - -:::note -If the `schema_inference_hints` is not formatted properly, or if there is a typo or a wrong datatype, etc... the whole schema_inference_hints will be ignored. -::: -)", 0) \ - M(SchemaInferenceMode, schema_inference_mode, "default", R"( -Mode of schema inference. 'default' - assume that all files have the same schema and schema can be inferred from any file, 'union' - files can have different schemas and the resulting schema should be the a union of schemas of all files -)", 0) \ - M(UInt64Auto, schema_inference_make_columns_nullable, 1, R"( -Controls making inferred types `Nullable` in schema inference. -If the setting is enabled, all inferred type will be `Nullable`, if disabled, the inferred type will never be `Nullable`, if set to `auto`, the inferred type will be `Nullable` only if the column contains `NULL` in a sample that is parsed during schema inference or file metadata contains information about column nullability. -)", 0) \ - M(Bool, input_format_json_read_bools_as_numbers, true, R"( -Allow parsing bools as numbers in JSON input formats. - -Enabled by default. -)", 0) \ - M(Bool, input_format_json_read_bools_as_strings, true, R"( -Allow parsing bools as strings in JSON input formats. - -Enabled by default. -)", 0) \ - M(Bool, input_format_json_try_infer_numbers_from_strings, false, R"( -If enabled, during schema inference ClickHouse will try to infer numbers from string fields. -It can be useful if JSON data contains quoted UInt64 numbers. - -Disabled by default. -)", 0) \ - M(Bool, input_format_json_validate_types_from_metadata, true, R"( -For JSON/JSONCompact/JSONColumnsWithMetadata input formats, if this setting is set to 1, -the types from metadata in input data will be compared with the types of the corresponding columns from the table. - -Enabled by default. -)", 0) \ - M(Bool, input_format_json_read_numbers_as_strings, true, R"( -Allow parsing numbers as strings in JSON input formats. - -Enabled by default. -)", 0) \ - M(Bool, input_format_json_read_objects_as_strings, true, R"( -Allow parsing JSON objects as strings in JSON input formats. - -Example: - -```sql -SET input_format_json_read_objects_as_strings = 1; -CREATE TABLE test (id UInt64, obj String, date Date) ENGINE=Memory(); -INSERT INTO test FORMAT JSONEachRow {"id" : 1, "obj" : {"a" : 1, "b" : "Hello"}, "date" : "2020-01-01"}; -SELECT * FROM test; -``` - -Result: - -``` -┌─id─┬─obj──────────────────────┬───────date─┐ -│ 1 │ {"a" : 1, "b" : "Hello"} │ 2020-01-01 │ -└────┴──────────────────────────┴────────────┘ -``` - -Enabled by default. -)", 0) \ - M(Bool, input_format_json_read_arrays_as_strings, true, R"( -Allow parsing JSON arrays as strings in JSON input formats. - -Example: - -```sql -SET input_format_json_read_arrays_as_strings = 1; -SELECT arr, toTypeName(arr), JSONExtractArrayRaw(arr)[3] from format(JSONEachRow, 'arr String', '{"arr" : [1, "Hello", [1,2,3]]}'); -``` - -Result: -``` -┌─arr───────────────────┬─toTypeName(arr)─┬─arrayElement(JSONExtractArrayRaw(arr), 3)─┐ -│ [1, "Hello", [1,2,3]] │ String │ [1,2,3] │ -└───────────────────────┴─────────────────┴───────────────────────────────────────────┘ -``` - -Enabled by default. -)", 0) \ - M(Bool, input_format_json_try_infer_named_tuples_from_objects, true, R"( -If enabled, during schema inference ClickHouse will try to infer named Tuple from JSON objects. -The resulting named Tuple will contain all elements from all corresponding JSON objects from sample data. - -Example: - -```sql -SET input_format_json_try_infer_named_tuples_from_objects = 1; -DESC format(JSONEachRow, '{"obj" : {"a" : 42, "b" : "Hello"}}, {"obj" : {"a" : 43, "c" : [1, 2, 3]}}, {"obj" : {"d" : {"e" : 42}}}') -``` - -Result: - -``` -┌─name─┬─type───────────────────────────────────────────────────────────────────────────────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ -│ obj │ Tuple(a Nullable(Int64), b Nullable(String), c Array(Nullable(Int64)), d Tuple(e Nullable(Int64))) │ │ │ │ │ │ -└──────┴────────────────────────────────────────────────────────────────────────────────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ -``` - -Enabled by default. -)", 0) \ - M(Bool, input_format_json_use_string_type_for_ambiguous_paths_in_named_tuples_inference_from_objects, false, R"( -Use String type instead of an exception in case of ambiguous paths in JSON objects during named tuples inference -)", 0) \ - M(Bool, input_format_json_infer_incomplete_types_as_strings, true, R"( -Allow to use String type for JSON keys that contain only `Null`/`{}`/`[]` in data sample during schema inference. -In JSON formats any value can be read as String, and we can avoid errors like `Cannot determine type for column 'column_name' by first 25000 rows of data, most likely this column contains only Nulls or empty Arrays/Maps` during schema inference -by using String type for keys with unknown types. - -Example: - -```sql -SET input_format_json_infer_incomplete_types_as_strings = 1, input_format_json_try_infer_named_tuples_from_objects = 1; -DESCRIBE format(JSONEachRow, '{"obj" : {"a" : [1,2,3], "b" : "hello", "c" : null, "d" : {}, "e" : []}}'); -SELECT * FROM format(JSONEachRow, '{"obj" : {"a" : [1,2,3], "b" : "hello", "c" : null, "d" : {}, "e" : []}}'); -``` - -Result: -``` -┌─name─┬─type───────────────────────────────────────────────────────────────────────────────────────────────────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ -│ obj │ Tuple(a Array(Nullable(Int64)), b Nullable(String), c Nullable(String), d Nullable(String), e Array(Nullable(String))) │ │ │ │ │ │ -└──────┴────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ - -┌─obj────────────────────────────┐ -│ ([1,2,3],'hello',NULL,'{}',[]) │ -└────────────────────────────────┘ -``` - -Enabled by default. -)", 0) \ - M(Bool, input_format_json_named_tuples_as_objects, true, R"( -Parse named tuple columns as JSON objects. - -Enabled by default. -)", 0) \ - M(Bool, input_format_json_ignore_unknown_keys_in_named_tuple, true, R"( -Ignore unknown keys in json object for named tuples. - -Enabled by default. -)", 0) \ - M(Bool, input_format_json_defaults_for_missing_elements_in_named_tuple, true, R"( -Insert default values for missing elements in JSON object while parsing named tuple. -This setting works only when setting `input_format_json_named_tuples_as_objects` is enabled. - -Enabled by default. -)", 0) \ - M(Bool, input_format_json_throw_on_bad_escape_sequence, true, R"( -Throw an exception if JSON string contains bad escape sequence in JSON input formats. If disabled, bad escape sequences will remain as is in the data. - -Enabled by default. -)", 0) \ - M(Bool, input_format_json_ignore_unnecessary_fields, true, R"( -Ignore unnecessary fields and not parse them. Enabling this may not throw exceptions on json strings of invalid format or with duplicated fields -)", 0) \ - M(Bool, input_format_try_infer_variants, false, R"( -If enabled, ClickHouse will try to infer type [`Variant`](../../sql-reference/data-types/variant.md) in schema inference for text formats when there is more than one possible type for column/array elements. - -Possible values: - -- 0 — Disabled. -- 1 — Enabled. -)", 0) \ - M(Bool, type_json_skip_duplicated_paths, false, R"( -When enabled, during parsing JSON object into JSON type duplicated paths will be ignored and only the first one will be inserted instead of an exception -)", 0) \ - M(UInt64, input_format_json_max_depth, 1000, R"( -Maximum depth of a field in JSON. This is not a strict limit, it does not have to be applied precisely. -)", 0) \ - M(Bool, input_format_json_empty_as_default, false, R"( -Treat empty fields in JSON input as default values. -)", 0) \ - M(Bool, input_format_try_infer_integers, true, R"( -If enabled, ClickHouse will try to infer integers instead of floats in schema inference for text formats. If all numbers in the column from input data are integers, the result type will be `Int64`, if at least one number is float, the result type will be `Float64`. - -Enabled by default. -)", 0) \ - M(Bool, input_format_try_infer_dates, true, R"( -If enabled, ClickHouse will try to infer type `Date` from string fields in schema inference for text formats. If all fields from a column in input data were successfully parsed as dates, the result type will be `Date`, if at least one field was not parsed as date, the result type will be `String`. - -Enabled by default. -)", 0) \ - M(Bool, input_format_try_infer_datetimes, true, R"( -If enabled, ClickHouse will try to infer type `DateTime64` from string fields in schema inference for text formats. If all fields from a column in input data were successfully parsed as datetimes, the result type will be `DateTime64`, if at least one field was not parsed as datetime, the result type will be `String`. - -Enabled by default. -)", 0) \ - M(Bool, input_format_try_infer_datetimes_only_datetime64, false, R"( -When input_format_try_infer_datetimes is enabled, infer only DateTime64 but not DateTime types -)", 0) \ - M(Bool, input_format_try_infer_exponent_floats, false, R"( -Try to infer floats in exponential notation while schema inference in text formats (except JSON, where exponent numbers are always inferred) -)", 0) \ - M(Bool, output_format_markdown_escape_special_characters, false, R"( -Escape special characters in Markdown -)", 0) \ - M(Bool, input_format_protobuf_flatten_google_wrappers, false, R"( -Enable Google wrappers for regular non-nested columns, e.g. google.protobuf.StringValue 'str' for String column 'str'. For Nullable columns empty wrappers are recognized as defaults, and missing as nulls -)", 0) \ - M(Bool, output_format_protobuf_nullables_with_google_wrappers, false, R"( -When serializing Nullable columns with Google wrappers, serialize default values as empty wrappers. If turned off, default and null values are not serialized -)", 0) \ - M(UInt64, input_format_csv_skip_first_lines, 0, R"( -Skip specified number of lines at the beginning of data in CSV format -)", 0) \ - M(UInt64, input_format_tsv_skip_first_lines, 0, R"( -Skip specified number of lines at the beginning of data in TSV format -)", 0) \ - M(Bool, input_format_csv_skip_trailing_empty_lines, false, R"( -Skip trailing empty lines in CSV format -)", 0) \ - M(Bool, input_format_tsv_skip_trailing_empty_lines, false, R"( -Skip trailing empty lines in TSV format -)", 0) \ - M(Bool, input_format_custom_skip_trailing_empty_lines, false, R"( -Skip trailing empty lines in CustomSeparated format -)", 0) \ - M(Bool, input_format_tsv_crlf_end_of_line, false, R"( -If it is set true, file function will read TSV format with \\r\\n instead of \\n. -)", 0) \ - \ - M(Bool, input_format_native_allow_types_conversion, true, R"( -Allow data types conversion in Native input format -)", 0) \ - M(Bool, input_format_native_decode_types_in_binary_format, false, R"( -Read data types in binary format instead of type names in Native input format -)", 0) \ - M(Bool, output_format_native_encode_types_in_binary_format, false, R"( -Write data types in binary format instead of type names in Native output format -)", 0) \ - M(Bool, output_format_native_write_json_as_string, false, R"( -Write data of [JSON](../../sql-reference/data-types/newjson.md) column as [String](../../sql-reference/data-types/string.md) column containing JSON strings instead of default native JSON serialization. -)", 0) \ - \ - M(DateTimeInputFormat, date_time_input_format, FormatSettings::DateTimeInputFormat::Basic, R"( -Allows choosing a parser of the text representation of date and time. - -The setting does not apply to [date and time functions](../../sql-reference/functions/date-time-functions.md). - -Possible values: - -- `'best_effort'` — Enables extended parsing. - - ClickHouse can parse the basic `YYYY-MM-DD HH:MM:SS` format and all [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601) date and time formats. For example, `'2018-06-08T01:02:03.000Z'`. - -- `'basic'` — Use basic parser. - - ClickHouse can parse only the basic `YYYY-MM-DD HH:MM:SS` or `YYYY-MM-DD` format. For example, `2019-08-20 10:18:56` or `2019-08-20`. - -Cloud default value: `'best_effort'`. - -See also: - -- [DateTime data type.](../../sql-reference/data-types/datetime.md) -- [Functions for working with dates and times.](../../sql-reference/functions/date-time-functions.md) -)", 0) \ - M(DateTimeOutputFormat, date_time_output_format, FormatSettings::DateTimeOutputFormat::Simple, R"( -Allows choosing different output formats of the text representation of date and time. - -Possible values: - -- `simple` - Simple output format. - - ClickHouse output date and time `YYYY-MM-DD hh:mm:ss` format. For example, `2019-08-20 10:18:56`. The calculation is performed according to the data type's time zone (if present) or server time zone. - -- `iso` - ISO output format. - - ClickHouse output date and time in [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601) `YYYY-MM-DDThh:mm:ssZ` format. For example, `2019-08-20T10:18:56Z`. Note that output is in UTC (`Z` means UTC). - -- `unix_timestamp` - Unix timestamp output format. - - ClickHouse output date and time in [Unix timestamp](https://en.wikipedia.org/wiki/Unix_time) format. For example `1566285536`. - -See also: - -- [DateTime data type.](../../sql-reference/data-types/datetime.md) -- [Functions for working with dates and times.](../../sql-reference/functions/date-time-functions.md) -)", 0) \ - M(IntervalOutputFormat, interval_output_format, FormatSettings::IntervalOutputFormat::Numeric, R"( -Allows choosing different output formats of the text representation of interval types. - -Possible values: - -- `kusto` - KQL-style output format. - - ClickHouse outputs intervals in [KQL format](https://learn.microsoft.com/en-us/dotnet/standard/base-types/standard-timespan-format-strings#the-constant-c-format-specifier). For example, `toIntervalDay(2)` would be formatted as `2.00:00:00`. Please note that for interval types of varying length (ie. `IntervalMonth` and `IntervalYear`) the average number of seconds per interval is taken into account. - -- `numeric` - Numeric output format. - - ClickHouse outputs intervals as their underlying numeric representation. For example, `toIntervalDay(2)` would be formatted as `2`. - -See also: - -- [Interval](../../sql-reference/data-types/special-data-types/interval.md) -)", 0) \ - \ - M(Bool, date_time_64_output_format_cut_trailing_zeros_align_to_groups_of_thousands, false, R"( -Dynamically trim the trailing zeros of datetime64 values to adjust the output scale to [0, 3, 6], -corresponding to 'seconds', 'milliseconds', and 'microseconds')", 0) \ - M(Bool, input_format_ipv4_default_on_conversion_error, false, R"( -Deserialization of IPv4 will use default values instead of throwing exception on conversion error. - -Disabled by default. -)", 0) \ - M(Bool, input_format_ipv6_default_on_conversion_error, false, R"( -Deserialization of IPV6 will use default values instead of throwing exception on conversion error. - -Disabled by default. -)", 0) \ - M(String, bool_true_representation, "true", R"( -Text to represent true bool value in TSV/CSV/Vertical/Pretty formats. -)", 0) \ - M(String, bool_false_representation, "false", R"( -Text to represent false bool value in TSV/CSV/Vertical/Pretty formats. -)", 0) \ - \ - M(Bool, input_format_values_interpret_expressions, true, R"( -For Values format: if the field could not be parsed by streaming parser, run SQL parser and try to interpret it as SQL expression. -)", 0) \ - M(Bool, input_format_values_deduce_templates_of_expressions, true, R"( -For Values format: if the field could not be parsed by streaming parser, run SQL parser, deduce template of the SQL expression, try to parse all rows using template and then interpret expression for all rows. -)", 0) \ - M(Bool, input_format_values_accurate_types_of_literals, true, R"( -For Values format: when parsing and interpreting expressions using template, check actual type of literal to avoid possible overflow and precision issues. -)", 0) \ - M(Bool, input_format_avro_allow_missing_fields, false, R"( -For Avro/AvroConfluent format: when field is not found in schema use default value instead of error -)", 0) \ - /** This setting is obsolete and do nothing, left for compatibility reasons. */ \ - M(Bool, input_format_avro_null_as_default, false, R"( -For Avro/AvroConfluent format: insert default in case of null and non Nullable column -)", 0) \ - M(UInt64, format_binary_max_string_size, 1_GiB, R"( -The maximum allowed size for String in RowBinary format. It prevents allocating large amount of memory in case of corrupted data. 0 means there is no limit -)", 0) \ - M(UInt64, format_binary_max_array_size, 1_GiB, R"( -The maximum allowed size for Array in RowBinary format. It prevents allocating large amount of memory in case of corrupted data. 0 means there is no limit -)", 0) \ - M(Bool, input_format_binary_decode_types_in_binary_format, false, R"( -Read data types in binary format instead of type names in RowBinaryWithNamesAndTypes input format -)", 0) \ - M(Bool, output_format_binary_encode_types_in_binary_format, false, R"( -Write data types in binary format instead of type names in RowBinaryWithNamesAndTypes output format -)", 0) \ - M(URI, format_avro_schema_registry_url, "", R"( -For AvroConfluent format: Confluent Schema Registry URL. -)", 0) \ - M(Bool, input_format_binary_read_json_as_string, false, R"( -Read values of [JSON](../../sql-reference/data-types/newjson.md) data type as JSON [String](../../sql-reference/data-types/string.md) values in RowBinary input format. -)", 0) \ - M(Bool, output_format_binary_write_json_as_string, false, R"( -Write values of [JSON](../../sql-reference/data-types/newjson.md) data type as JSON [String](../../sql-reference/data-types/string.md) values in RowBinary output format. -)", 0) \ - \ - M(Bool, output_format_json_quote_64bit_integers, true, R"( -Controls quoting of 64-bit or bigger [integers](../../sql-reference/data-types/int-uint.md) (like `UInt64` or `Int128`) when they are output in a [JSON](../../interfaces/formats.md/#json) format. -Such integers are enclosed in quotes by default. This behavior is compatible with most JavaScript implementations. - -Possible values: - -- 0 — Integers are output without quotes. -- 1 — Integers are enclosed in quotes. -)", 0) \ - M(Bool, output_format_json_quote_denormals, false, R"str( -Enables `+nan`, `-nan`, `+inf`, `-inf` outputs in [JSON](../../interfaces/formats.md/#json) output format. - -Possible values: - -- 0 — Disabled. -- 1 — Enabled. - -**Example** - -Consider the following table `account_orders`: - -```text -┌─id─┬─name───┬─duration─┬─period─┬─area─┐ -│ 1 │ Andrew │ 20 │ 0 │ 400 │ -│ 2 │ John │ 40 │ 0 │ 0 │ -│ 3 │ Bob │ 15 │ 0 │ -100 │ -└────┴────────┴──────────┴────────┴──────┘ -``` - -When `output_format_json_quote_denormals = 0`, the query returns `null` values in output: - -```sql -SELECT area/period FROM account_orders FORMAT JSON; -``` - -```json -{ - "meta": - [ - { - "name": "divide(area, period)", - "type": "Float64" - } - ], - - "data": - [ - { - "divide(area, period)": null - }, - { - "divide(area, period)": null - }, - { - "divide(area, period)": null - } - ], - - "rows": 3, - - "statistics": - { - "elapsed": 0.003648093, - "rows_read": 3, - "bytes_read": 24 - } -} -``` - -When `output_format_json_quote_denormals = 1`, the query returns: - -```json -{ - "meta": - [ - { - "name": "divide(area, period)", - "type": "Float64" - } - ], - - "data": - [ - { - "divide(area, period)": "inf" - }, - { - "divide(area, period)": "-nan" - }, - { - "divide(area, period)": "-inf" - } - ], - - "rows": 3, - - "statistics": - { - "elapsed": 0.000070241, - "rows_read": 3, - "bytes_read": 24 - } -} -``` -)str", 0) \ - M(Bool, output_format_json_quote_decimals, false, R"( -Controls quoting of decimals in JSON output formats. - -Disabled by default. -)", 0) \ - M(Bool, output_format_json_quote_64bit_floats, false, R"( -Controls quoting of 64-bit [floats](../../sql-reference/data-types/float.md) when they are output in JSON* formats. - -Disabled by default. -)", 0) \ - \ - M(Bool, output_format_json_escape_forward_slashes, true, R"( -Controls escaping forward slashes for string outputs in JSON output format. This is intended for compatibility with JavaScript. Don't confuse with backslashes that are always escaped. - -Enabled by default. -)", 0) \ - M(Bool, output_format_json_named_tuples_as_objects, true, R"( -Serialize named tuple columns as JSON objects. - -Enabled by default. -)", 0) \ - M(Bool, output_format_json_skip_null_value_in_named_tuples, false, R"( -Skip key value pairs with null value when serialize named tuple columns as JSON objects. It is only valid when output_format_json_named_tuples_as_objects is true. -)", 0) \ - M(Bool, output_format_json_array_of_rows, false, R"( -Enables the ability to output all rows as a JSON array in the [JSONEachRow](../../interfaces/formats.md/#jsoneachrow) format. - -Possible values: - -- 1 — ClickHouse outputs all rows as an array, each row in the `JSONEachRow` format. -- 0 — ClickHouse outputs each row separately in the `JSONEachRow` format. - -**Example of a query with the enabled setting** - -Query: - -```sql -SET output_format_json_array_of_rows = 1; -SELECT number FROM numbers(3) FORMAT JSONEachRow; -``` - -Result: - -```text -[ -{"number":"0"}, -{"number":"1"}, -{"number":"2"} -] -``` - -**Example of a query with the disabled setting** - -Query: - -```sql -SET output_format_json_array_of_rows = 0; -SELECT number FROM numbers(3) FORMAT JSONEachRow; -``` - -Result: - -```text -{"number":"0"} -{"number":"1"} -{"number":"2"} -``` -)", 0) \ - M(Bool, output_format_json_validate_utf8, false, R"( -Controls validation of UTF-8 sequences in JSON output formats, doesn't impact formats JSON/JSONCompact/JSONColumnsWithMetadata, they always validate UTF-8. - -Disabled by default. -)", 0) \ - \ - M(String, format_json_object_each_row_column_for_object_name, "", R"( -The name of column that will be used for storing/writing object names in [JSONObjectEachRow](../../interfaces/formats.md/#jsonobjecteachrow) format. -Column type should be String. If value is empty, default names `row_{i}`will be used for object names. - -### input_format_json_compact_allow_variable_number_of_columns {#input_format_json_compact_allow_variable_number_of_columns} - -Allow variable number of columns in rows in JSONCompact/JSONCompactEachRow input formats. -Ignore extra columns in rows with more columns than expected and treat missing columns as default values. - -Disabled by default. - -### output_format_markdown_escape_special_characters {#output_format_markdown_escape_special_characters} - -When enabled, escape special characters in Markdown. - -[Common Mark](https://spec.commonmark.org/0.30/#example-12) defines the following special characters that can be escaped by \: - -``` -! " # $ % & ' ( ) * + , - . / : ; < = > ? @ [ \ ] ^ _ ` { | } ~ -``` - -Possible values: - -+ 0 — Disable. -+ 1 — Enable. - -### input_format_json_empty_as_default {#input_format_json_empty_as_default} - -When enabled, replace empty input fields in JSON with default values. For complex default expressions `input_format_defaults_for_omitted_fields` must be enabled too. - -Possible values: - -+ 0 — Disable. -+ 1 — Enable. -)", 0) \ - \ - M(UInt64, output_format_pretty_max_rows, 10000, R"( -Rows limit for Pretty formats. -)", 0) \ - M(UInt64, output_format_pretty_max_column_pad_width, 250, R"( -Maximum width to pad all values in a column in Pretty formats. -)", 0) \ - M(UInt64, output_format_pretty_max_value_width, 10000, R"( -Maximum width of value to display in Pretty formats. If greater - it will be cut. -)", 0) \ - M(UInt64, output_format_pretty_max_value_width_apply_for_single_value, false, R"( -Only cut values (see the `output_format_pretty_max_value_width` setting) when it is not a single value in a block. Otherwise output it entirely, which is useful for the `SHOW CREATE TABLE` query. -)", 0) \ - M(UInt64Auto, output_format_pretty_color, "auto", R"( -Use ANSI escape sequences in Pretty formats. 0 - disabled, 1 - enabled, 'auto' - enabled if a terminal. -)", 0) \ - M(String, output_format_pretty_grid_charset, "UTF-8", R"( -Charset for printing grid borders. Available charsets: ASCII, UTF-8 (default one). -)", 0) \ - M(UInt64, output_format_pretty_display_footer_column_names, true, R"( -Display column names in the footer if there are many table rows. - -Possible values: - -- 0 — No column names are displayed in the footer. -- 1 — Column names are displayed in the footer if row count is greater than or equal to the threshold value set by [output_format_pretty_display_footer_column_names_min_rows](#output_format_pretty_display_footer_column_names_min_rows) (50 by default). - -**Example** - -Query: - -```sql -SELECT *, toTypeName(*) FROM (SELECT * FROM system.numbers LIMIT 1000); -``` - -Result: - -```response - ┌─number─┬─toTypeName(number)─┐ - 1. │ 0 │ UInt64 │ - 2. │ 1 │ UInt64 │ - 3. │ 2 │ UInt64 │ - ... - 999. │ 998 │ UInt64 │ -1000. │ 999 │ UInt64 │ - └─number─┴─toTypeName(number)─┘ -``` -)", 0) \ - M(UInt64, output_format_pretty_display_footer_column_names_min_rows, 50, R"( -Sets the minimum number of rows for which a footer with column names will be displayed if setting [output_format_pretty_display_footer_column_names](#output_format_pretty_display_footer_column_names) is enabled. -)", 0) \ - M(UInt64, output_format_parquet_row_group_size, 1000000, R"( -Target row group size in rows. -)", 0) \ - M(UInt64, output_format_parquet_row_group_size_bytes, 512 * 1024 * 1024, R"( -Target row group size in bytes, before compression. -)", 0) \ - M(Bool, output_format_parquet_string_as_string, true, R"( -Use Parquet String type instead of Binary for String columns. -)", 0) \ - M(Bool, output_format_parquet_fixed_string_as_fixed_byte_array, true, R"( -Use Parquet FIXED_LENGTH_BYTE_ARRAY type instead of Binary for FixedString columns. -)", 0) \ - M(ParquetVersion, output_format_parquet_version, "2.latest", R"( -Parquet format version for output format. Supported versions: 1.0, 2.4, 2.6 and 2.latest (default) -)", 0) \ - M(ParquetCompression, output_format_parquet_compression_method, "zstd", R"( -Compression method for Parquet output format. Supported codecs: snappy, lz4, brotli, zstd, gzip, none (uncompressed) -)", 0) \ - M(Bool, output_format_parquet_compliant_nested_types, true, R"( -In parquet file schema, use name 'element' instead of 'item' for list elements. This is a historical artifact of Arrow library implementation. Generally increases compatibility, except perhaps with some old versions of Arrow. -)", 0) \ - M(Bool, output_format_parquet_use_custom_encoder, true, R"( -Use a faster Parquet encoder implementation. -)", 0) \ - M(Bool, output_format_parquet_parallel_encoding, true, R"( -Do Parquet encoding in multiple threads. Requires output_format_parquet_use_custom_encoder. -)", 0) \ - M(UInt64, output_format_parquet_data_page_size, 1024 * 1024, R"( -Target page size in bytes, before compression. -)", 0) \ - M(UInt64, output_format_parquet_batch_size, 1024, R"( -Check page size every this many rows. Consider decreasing if you have columns with average values size above a few KBs. -)", 0) \ - M(Bool, output_format_parquet_write_page_index, true, R"( -Add a possibility to write page index into parquet files. -)", 0) \ - M(String, output_format_avro_codec, "", R"( -Compression codec used for output. Possible values: 'null', 'deflate', 'snappy', 'zstd'. -)", 0) \ - M(UInt64, output_format_avro_sync_interval, 16 * 1024, R"( -Sync interval in bytes. -)", 0) \ - M(String, output_format_avro_string_column_pattern, "", R"( -For Avro format: regexp of String columns to select as AVRO string. -)", 0) \ - M(UInt64, output_format_avro_rows_in_file, 1, R"( -Max rows in a file (if permitted by storage) -)", 0) \ - M(Bool, output_format_tsv_crlf_end_of_line, false, R"( -If it is set true, end of line in TSV format will be \\r\\n instead of \\n. -)", 0) \ - M(String, format_csv_null_representation, "\\N", R"( -Custom NULL representation in CSV format -)", 0) \ - M(String, format_tsv_null_representation, "\\N", R"( -Custom NULL representation in TSV format -)", 0) \ - M(Bool, output_format_decimal_trailing_zeros, false, R"( -Output trailing zeros when printing Decimal values. E.g. 1.230000 instead of 1.23. - -Disabled by default. -)", 0) \ - \ - M(UInt64, input_format_allow_errors_num, 0, R"( -Sets the maximum number of acceptable errors when reading from text formats (CSV, TSV, etc.). - -The default value is 0. - -Always pair it with `input_format_allow_errors_ratio`. - -If an error occurred while reading rows but the error counter is still less than `input_format_allow_errors_num`, ClickHouse ignores the row and moves on to the next one. - -If both `input_format_allow_errors_num` and `input_format_allow_errors_ratio` are exceeded, ClickHouse throws an exception. -)", 0) \ - M(Float, input_format_allow_errors_ratio, 0, R"( -Sets the maximum percentage of errors allowed when reading from text formats (CSV, TSV, etc.). -The percentage of errors is set as a floating-point number between 0 and 1. - -The default value is 0. - -Always pair it with `input_format_allow_errors_num`. - -If an error occurred while reading rows but the error counter is still less than `input_format_allow_errors_ratio`, ClickHouse ignores the row and moves on to the next one. - -If both `input_format_allow_errors_num` and `input_format_allow_errors_ratio` are exceeded, ClickHouse throws an exception. -)", 0) \ - M(String, input_format_record_errors_file_path, "", R"( -Path of the file used to record errors while reading text formats (CSV, TSV). -)", 0) \ - M(String, errors_output_format, "CSV", R"( -Method to write Errors to text output. -)", 0) \ - \ - M(String, format_schema, "", R"( -This parameter is useful when you are using formats that require a schema definition, such as [Cap’n Proto](https://capnproto.org/) or [Protobuf](https://developers.google.com/protocol-buffers/). The value depends on the format. -)", 0) \ - M(String, format_template_resultset, "", R"( -Path to file which contains format string for result set (for Template format) -)", 0) \ - M(String, format_template_row, "", R"( -Path to file which contains format string for rows (for Template format) -)", 0) \ - M(String, format_template_row_format, "", R"( -Format string for rows (for Template format) -)", 0) \ - M(String, format_template_resultset_format, "", R"( -Format string for result set (for Template format) -)", 0) \ - M(String, format_template_rows_between_delimiter, "\n", R"( -Delimiter between rows (for Template format) -)", 0) \ - \ - M(EscapingRule, format_custom_escaping_rule, "Escaped", R"( -Field escaping rule (for CustomSeparated format) -)", 0) \ - M(String, format_custom_field_delimiter, "\t", R"( -Delimiter between fields (for CustomSeparated format) -)", 0) \ - M(String, format_custom_row_before_delimiter, "", R"( -Delimiter before field of the first column (for CustomSeparated format) -)", 0) \ - M(String, format_custom_row_after_delimiter, "\n", R"( -Delimiter after field of the last column (for CustomSeparated format) -)", 0) \ - M(String, format_custom_row_between_delimiter, "", R"( -Delimiter between rows (for CustomSeparated format) -)", 0) \ - M(String, format_custom_result_before_delimiter, "", R"( -Prefix before result set (for CustomSeparated format) -)", 0) \ - M(String, format_custom_result_after_delimiter, "", R"( -Suffix after result set (for CustomSeparated format) -)", 0) \ - \ - M(String, format_regexp, "", R"( -Regular expression (for Regexp format) -)", 0) \ - M(EscapingRule, format_regexp_escaping_rule, "Raw", R"( -Field escaping rule (for Regexp format) -)", 0) \ - M(Bool, format_regexp_skip_unmatched, false, R"( -Skip lines unmatched by regular expression (for Regexp format) -)", 0) \ - \ - M(Bool, output_format_enable_streaming, false, R"( -Enable streaming in output formats that support it. - -Disabled by default. -)", 0) \ - M(Bool, output_format_write_statistics, true, R"( -Write statistics about read rows, bytes, time elapsed in suitable output formats. - -Enabled by default -)", 0) \ - M(Bool, output_format_pretty_row_numbers, true, R"( -Add row numbers before each row for pretty output format -)", 0) \ - M(Bool, output_format_pretty_highlight_digit_groups, true, R"( -If enabled and if output is a terminal, highlight every digit corresponding to the number of thousands, millions, etc. with underline. -)", 0) \ - M(UInt64, output_format_pretty_single_large_number_tip_threshold, 1'000'000, R"( -Print a readable number tip on the right side of the table if the block consists of a single number which exceeds this value (except 0) -)", 0) \ - M(Bool, insert_distributed_one_random_shard, false, R"( -Enables or disables random shard insertion into a [Distributed](../../engines/table-engines/special/distributed.md/#distributed) table when there is no distributed key. - -By default, when inserting data into a `Distributed` table with more than one shard, the ClickHouse server will reject any insertion request if there is no distributed key. When `insert_distributed_one_random_shard = 1`, insertions are allowed and data is forwarded randomly among all shards. - -Possible values: - -- 0 — Insertion is rejected if there are multiple shards and no distributed key is given. -- 1 — Insertion is done randomly among all available shards when no distributed key is given. -)", 0) \ - \ - M(Bool, exact_rows_before_limit, false, R"( -When enabled, ClickHouse will provide exact value for rows_before_limit_at_least statistic, but with the cost that the data before limit will have to be read completely -)", 0) \ - M(Bool, rows_before_aggregation, false, R"( -When enabled, ClickHouse will provide exact value for rows_before_aggregation statistic, represents the number of rows read before aggregation -)", 0) \ - M(UInt64, cross_to_inner_join_rewrite, 1, R"( -Use inner join instead of comma/cross join if there are joining expressions in the WHERE section. Values: 0 - no rewrite, 1 - apply if possible for comma/cross, 2 - force rewrite all comma joins, cross - if possible -)", 0) \ - \ - M(Bool, output_format_arrow_low_cardinality_as_dictionary, false, R"( -Enable output LowCardinality type as Dictionary Arrow type -)", 0) \ - M(Bool, output_format_arrow_use_signed_indexes_for_dictionary, true, R"( -Use signed integers for dictionary indexes in Arrow format -)", 0) \ - M(Bool, output_format_arrow_use_64_bit_indexes_for_dictionary, false, R"( -Always use 64 bit integers for dictionary indexes in Arrow format -)", 0) \ - M(Bool, output_format_arrow_string_as_string, true, R"( -Use Arrow String type instead of Binary for String columns -)", 0) \ - M(Bool, output_format_arrow_fixed_string_as_fixed_byte_array, true, R"( -Use Arrow FIXED_SIZE_BINARY type instead of Binary for FixedString columns. -)", 0) \ - M(ArrowCompression, output_format_arrow_compression_method, "lz4_frame", R"( -Compression method for Arrow output format. Supported codecs: lz4_frame, zstd, none (uncompressed) -)", 0) \ - \ - M(Bool, output_format_orc_string_as_string, true, R"( -Use ORC String type instead of Binary for String columns -)", 0) \ - M(ORCCompression, output_format_orc_compression_method, "zstd", R"( -Compression method for ORC output format. Supported codecs: lz4, snappy, zlib, zstd, none (uncompressed) -)", 0) \ - M(UInt64, output_format_orc_row_index_stride, 10'000, R"( -Target row index stride in ORC output format -)", 0) \ - M(Double, output_format_orc_dictionary_key_size_threshold, 0.0, R"( -For a string column in ORC output format, if the number of distinct values is greater than this fraction of the total number of non-null rows, turn off dictionary encoding. Otherwise dictionary encoding is enabled -)", 0) \ - \ - M(CapnProtoEnumComparingMode, format_capn_proto_enum_comparising_mode, FormatSettings::CapnProtoEnumComparingMode::BY_VALUES, R"( -How to map ClickHouse Enum and CapnProto Enum -)", 0) \ - \ - M(Bool, format_capn_proto_use_autogenerated_schema, true, R"( -Use autogenerated CapnProto schema when format_schema is not set -)", 0) \ - M(Bool, format_protobuf_use_autogenerated_schema, true, R"( -Use autogenerated Protobuf when format_schema is not set -)", 0) \ - M(String, output_format_schema, "", R"( -The path to the file where the automatically generated schema will be saved in [Cap’n Proto](../../interfaces/formats.md#capnproto-capnproto) or [Protobuf](../../interfaces/formats.md#protobuf-protobuf) formats. -)", 0) \ - \ - M(String, input_format_mysql_dump_table_name, "", R"( -Name of the table in MySQL dump from which to read data -)", 0) \ - M(Bool, input_format_mysql_dump_map_column_names, true, R"( -Match columns from table in MySQL dump and columns from ClickHouse table by names -)", 0) \ - \ - M(UInt64, output_format_sql_insert_max_batch_size, DEFAULT_BLOCK_SIZE, R"( -The maximum number of rows in one INSERT statement. -)", 0) \ - M(String, output_format_sql_insert_table_name, "table", R"( -The name of table in the output INSERT query -)", 0) \ - M(Bool, output_format_sql_insert_include_column_names, true, R"( -Include column names in INSERT query -)", 0) \ - M(Bool, output_format_sql_insert_use_replace, false, R"( -Use REPLACE statement instead of INSERT -)", 0) \ - M(Bool, output_format_sql_insert_quote_names, true, R"( -Quote column names with '`' characters -)", 0) \ - \ - M(Bool, output_format_values_escape_quote_with_quote, false, R"( -If true escape ' with '', otherwise quoted with \\' -)", 0) \ - \ - M(Bool, output_format_bson_string_as_string, false, R"( -Use BSON String type instead of Binary for String columns. -)", 0) \ - M(Bool, input_format_bson_skip_fields_with_unsupported_types_in_schema_inference, false, R"( -Skip fields with unsupported types while schema inference for format BSON. -)", 0) \ - \ - M(Bool, format_display_secrets_in_show_and_select, false, R"( -Enables or disables showing secrets in `SHOW` and `SELECT` queries for tables, databases, -table functions, and dictionaries. - -User wishing to see secrets must also have -[`display_secrets_in_show_and_select` server setting](../server-configuration-parameters/settings#display_secrets_in_show_and_select) -turned on and a -[`displaySecretsInShowAndSelect`](../../sql-reference/statements/grant#display-secrets) privilege. - -Possible values: - -- 0 — Disabled. -- 1 — Enabled. -)", IMPORTANT) \ - M(Bool, regexp_dict_allow_hyperscan, true, R"( -Allow regexp_tree dictionary using Hyperscan library. -)", 0) \ - M(Bool, regexp_dict_flag_case_insensitive, false, R"( -Use case-insensitive matching for a regexp_tree dictionary. Can be overridden in individual expressions with (?i) and (?-i). -)", 0) \ - M(Bool, regexp_dict_flag_dotall, false, R"( -Allow '.' to match newline characters for a regexp_tree dictionary. -)", 0) \ - \ - M(Bool, dictionary_use_async_executor, false, R"( -Execute a pipeline for reading dictionary source in several threads. It's supported only by dictionaries with local CLICKHOUSE source. -)", 0) \ - M(Bool, precise_float_parsing, false, R"( -Prefer more precise (but slower) float parsing algorithm -)", 0) \ - M(DateTimeOverflowBehavior, date_time_overflow_behavior, "ignore", R"( -Overflow mode for Date, Date32, DateTime, DateTime64 types. Possible values: 'ignore', 'throw', 'saturate'. -)", 0) \ - M(Bool, validate_experimental_and_suspicious_types_inside_nested_types, true, R"( -Validate usage of experimental and suspicious types inside nested types like Array/Map/Tuple -)", 0) \ - \ - M(IdentifierQuotingRule, show_create_query_identifier_quoting_rule, IdentifierQuotingRule::WhenNecessary, R"( -Set the quoting rule for identifiers in SHOW CREATE query -)", 0) \ - M(IdentifierQuotingStyle, show_create_query_identifier_quoting_style, IdentifierQuotingStyle::Backticks, R"( -Set the quoting style for identifiers in SHOW CREATE query -)", 0) \ - -// End of FORMAT_FACTORY_SETTINGS - -#define OBSOLETE_FORMAT_SETTINGS(M, ALIAS) \ - /** Obsolete format settings that do nothing but left for compatibility reasons. Remove each one after half a year of obsolescence. */ \ - MAKE_OBSOLETE(M, Bool, input_format_arrow_import_nested, false) \ - MAKE_OBSOLETE(M, Bool, input_format_parquet_import_nested, false) \ - MAKE_OBSOLETE(M, Bool, input_format_orc_import_nested, false) \ - -#endif // __CLION_IDE__ - -#define LIST_OF_ALL_FORMAT_SETTINGS(M, ALIAS) \ - FORMAT_FACTORY_SETTINGS(M, ALIAS) \ - OBSOLETE_FORMAT_SETTINGS(M, ALIAS) diff --git a/src/Core/MergeSelectorAlgorithm.h b/src/Core/MergeSelectorAlgorithm.h index 8af23d0f885..2262663602c 100644 --- a/src/Core/MergeSelectorAlgorithm.h +++ b/src/Core/MergeSelectorAlgorithm.h @@ -8,6 +8,7 @@ enum class MergeSelectorAlgorithm : uint8_t { SIMPLE, STOCHASTIC_SIMPLE, + TRIVIAL, COMPLEX, }; diff --git a/src/Core/ServerSettings.cpp b/src/Core/ServerSettings.cpp index 42991ebe0ba..7c2cb49a2ba 100644 --- a/src/Core/ServerSettings.cpp +++ b/src/Core/ServerSettings.cpp @@ -26,170 +26,170 @@ extern const Metric BackgroundMessageBrokerSchedulePoolSize; namespace DB { -#define LIST_OF_SERVER_SETTINGS(M, ALIAS) \ - M(Bool, show_addresses_in_stack_traces, true, "If it is set true will show addresses in stack traces", 0) \ - M(Bool, shutdown_wait_unfinished_queries, false, "If set true ClickHouse will wait for running queries finish before shutdown.", 0) \ - M(UInt64, shutdown_wait_unfinished, 5, "Delay in seconds to wait for unfinished queries", 0) \ - M(UInt64, max_thread_pool_size, 10000, "The maximum number of threads that could be allocated from the OS and used for query execution and background operations.", 0) \ - M(UInt64, max_thread_pool_free_size, 1000, "The maximum number of threads that will always stay in a global thread pool once allocated and remain idle in case of insufficient number of tasks.", 0) \ - M(UInt64, thread_pool_queue_size, 10000, "The maximum number of tasks that will be placed in a queue and wait for execution.", 0) \ - M(UInt64, max_io_thread_pool_size, 100, "The maximum number of threads that would be used for IO operations", 0) \ - M(UInt64, max_io_thread_pool_free_size, 0, "Max free size for IO thread pool.", 0) \ - M(UInt64, io_thread_pool_queue_size, 10000, "Queue size for IO thread pool.", 0) \ - M(UInt64, max_active_parts_loading_thread_pool_size, 64, "The number of threads to load active set of data parts (Active ones) at startup.", 0) \ - M(UInt64, max_outdated_parts_loading_thread_pool_size, 32, "The number of threads to load inactive set of data parts (Outdated ones) at startup.", 0) \ - M(UInt64, max_unexpected_parts_loading_thread_pool_size, 8, "The number of threads to load inactive set of data parts (Unexpected ones) at startup.", 0) \ - M(UInt64, max_parts_cleaning_thread_pool_size, 128, "The number of threads for concurrent removal of inactive data parts.", 0) \ - M(UInt64, max_mutations_bandwidth_for_server, 0, "The maximum read speed of all mutations on server in bytes per second. Zero means unlimited.", 0) \ - M(UInt64, max_merges_bandwidth_for_server, 0, "The maximum read speed of all merges on server in bytes per second. Zero means unlimited.", 0) \ - M(UInt64, max_replicated_fetches_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for replicated fetches. Zero means unlimited.", 0) \ - M(UInt64, max_replicated_sends_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for replicated sends. Zero means unlimited.", 0) \ - M(UInt64, max_remote_read_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for read. Zero means unlimited.", 0) \ - M(UInt64, max_remote_write_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for write. Zero means unlimited.", 0) \ - M(UInt64, max_local_read_bandwidth_for_server, 0, "The maximum speed of local reads in bytes per second. Zero means unlimited.", 0) \ - M(UInt64, max_local_write_bandwidth_for_server, 0, "The maximum speed of local writes in bytes per second. Zero means unlimited.", 0) \ - M(UInt64, max_backups_io_thread_pool_size, 1000, "The maximum number of threads that would be used for IO operations for BACKUP queries", 0) \ - M(UInt64, max_backups_io_thread_pool_free_size, 0, "Max free size for backups IO thread pool.", 0) \ - M(UInt64, backups_io_thread_pool_queue_size, 0, "Queue size for backups IO thread pool.", 0) \ - M(UInt64, backup_threads, 16, "The maximum number of threads to execute BACKUP requests.", 0) \ - M(UInt64, max_backup_bandwidth_for_server, 0, "The maximum read speed in bytes per second for all backups on server. Zero means unlimited.", 0) \ - M(UInt64, restore_threads, 16, "The maximum number of threads to execute RESTORE requests.", 0) \ - M(Bool, shutdown_wait_backups_and_restores, true, "If set to true ClickHouse will wait for running backups and restores to finish before shutdown.", 0) \ - M(Double, cannot_allocate_thread_fault_injection_probability, 0, "For testing purposes.", 0) \ - M(Int32, max_connections, 1024, "Max server connections.", 0) \ - M(UInt32, asynchronous_metrics_update_period_s, 1, "Period in seconds for updating asynchronous metrics.", 0) \ - M(UInt32, asynchronous_heavy_metrics_update_period_s, 120, "Period in seconds for updating heavy asynchronous metrics.", 0) \ - M(String, default_database, "default", "Default database name.", 0) \ - M(String, tmp_policy, "", "Policy for storage with temporary data.", 0) \ - M(UInt64, max_temporary_data_on_disk_size, 0, "The maximum amount of storage that could be used for external aggregation, joins or sorting.", 0) \ - M(String, temporary_data_in_cache, "", "Cache disk name for temporary data.", 0) \ - M(UInt64, aggregate_function_group_array_max_element_size, 0xFFFFFF, "Max array element size in bytes for groupArray function. This limit is checked at serialization and help to avoid large state size.", 0) \ - M(GroupArrayActionWhenLimitReached, aggregate_function_group_array_action_when_limit_is_reached, GroupArrayActionWhenLimitReached::THROW, "Action to execute when max array element size is exceeded in groupArray: `throw` exception, or `discard` extra values", 0) \ - M(UInt64, max_server_memory_usage, 0, "Maximum total memory usage of the server in bytes. Zero means unlimited.", 0) \ - M(Double, max_server_memory_usage_to_ram_ratio, 0.9, "Same as max_server_memory_usage but in to RAM ratio. Allows to lower max memory on low-memory systems.", 0) \ - M(UInt64, merges_mutations_memory_usage_soft_limit, 0, "Maximum total memory usage for merges and mutations in bytes. Zero means unlimited.", 0) \ - M(Double, merges_mutations_memory_usage_to_ram_ratio, 0.5, "Same as merges_mutations_memory_usage_soft_limit but in to RAM ratio. Allows to lower memory limit on low-memory systems.", 0) \ - M(Bool, allow_use_jemalloc_memory, true, "Allows to use jemalloc memory.", 0) \ - M(UInt64, cgroups_memory_usage_observer_wait_time, 15, "Polling interval in seconds to read the current memory usage from cgroups. Zero means disabled.", 0) \ - M(Double, cgroup_memory_watcher_hard_limit_ratio, 0.95, "Hard memory limit ratio for cgroup memory usage observer", 0) \ - M(Double, cgroup_memory_watcher_soft_limit_ratio, 0.9, "Soft memory limit ratio limit for cgroup memory usage observer", 0) \ - M(UInt64, async_insert_threads, 16, "Maximum number of threads to actually parse and insert data in background. Zero means asynchronous mode is disabled", 0) \ - M(Bool, async_insert_queue_flush_on_shutdown, true, "If true queue of asynchronous inserts is flushed on graceful shutdown", 0) \ - M(Bool, ignore_empty_sql_security_in_create_view_query, true, "If true, ClickHouse doesn't write defaults for empty SQL security statement in CREATE VIEW queries. This setting is only necessary for the migration period and will become obsolete in 24.4", 0) \ - M(UInt64, max_build_vector_similarity_index_thread_pool_size, 16, "The maximum number of threads to use to build vector similarity indexes. 0 means all cores.", 0) \ +#define LIST_OF_SERVER_SETTINGS(DECLARE, ALIAS) \ + DECLARE(Bool, show_addresses_in_stack_traces, true, "If it is set true will show addresses in stack traces", 0) \ + DECLARE(Bool, shutdown_wait_unfinished_queries, false, "If set true ClickHouse will wait for running queries finish before shutdown.", 0) \ + DECLARE(UInt64, shutdown_wait_unfinished, 5, "Delay in seconds to wait for unfinished queries", 0) \ + DECLARE(UInt64, max_thread_pool_size, 10000, "The maximum number of threads that could be allocated from the OS and used for query execution and background operations.", 0) \ + DECLARE(UInt64, max_thread_pool_free_size, 1000, "The maximum number of threads that will always stay in a global thread pool once allocated and remain idle in case of insufficient number of tasks.", 0) \ + DECLARE(UInt64, thread_pool_queue_size, 10000, "The maximum number of tasks that will be placed in a queue and wait for execution.", 0) \ + DECLARE(UInt64, max_io_thread_pool_size, 100, "The maximum number of threads that would be used for IO operations", 0) \ + DECLARE(UInt64, max_io_thread_pool_free_size, 0, "Max free size for IO thread pool.", 0) \ + DECLARE(UInt64, io_thread_pool_queue_size, 10000, "Queue size for IO thread pool.", 0) \ + DECLARE(UInt64, max_active_parts_loading_thread_pool_size, 64, "The number of threads to load active set of data parts (Active ones) at startup.", 0) \ + DECLARE(UInt64, max_outdated_parts_loading_thread_pool_size, 32, "The number of threads to load inactive set of data parts (Outdated ones) at startup.", 0) \ + DECLARE(UInt64, max_unexpected_parts_loading_thread_pool_size, 8, "The number of threads to load inactive set of data parts (Unexpected ones) at startup.", 0) \ + DECLARE(UInt64, max_parts_cleaning_thread_pool_size, 128, "The number of threads for concurrent removal of inactive data parts.", 0) \ + DECLARE(UInt64, max_mutations_bandwidth_for_server, 0, "The maximum read speed of all mutations on server in bytes per second. Zero means unlimited.", 0) \ + DECLARE(UInt64, max_merges_bandwidth_for_server, 0, "The maximum read speed of all merges on server in bytes per second. Zero means unlimited.", 0) \ + DECLARE(UInt64, max_replicated_fetches_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for replicated fetches. Zero means unlimited.", 0) \ + DECLARE(UInt64, max_replicated_sends_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for replicated sends. Zero means unlimited.", 0) \ + DECLARE(UInt64, max_remote_read_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for read. Zero means unlimited.", 0) \ + DECLARE(UInt64, max_remote_write_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for write. Zero means unlimited.", 0) \ + DECLARE(UInt64, max_local_read_bandwidth_for_server, 0, "The maximum speed of local reads in bytes per second. Zero means unlimited.", 0) \ + DECLARE(UInt64, max_local_write_bandwidth_for_server, 0, "The maximum speed of local writes in bytes per second. Zero means unlimited.", 0) \ + DECLARE(UInt64, max_backups_io_thread_pool_size, 1000, "The maximum number of threads that would be used for IO operations for BACKUP queries", 0) \ + DECLARE(UInt64, max_backups_io_thread_pool_free_size, 0, "Max free size for backups IO thread pool.", 0) \ + DECLARE(UInt64, backups_io_thread_pool_queue_size, 0, "Queue size for backups IO thread pool.", 0) \ + DECLARE(UInt64, backup_threads, 16, "The maximum number of threads to execute BACKUP requests.", 0) \ + DECLARE(UInt64, max_backup_bandwidth_for_server, 0, "The maximum read speed in bytes per second for all backups on server. Zero means unlimited.", 0) \ + DECLARE(UInt64, restore_threads, 16, "The maximum number of threads to execute RESTORE requests.", 0) \ + DECLARE(Bool, shutdown_wait_backups_and_restores, true, "If set to true ClickHouse will wait for running backups and restores to finish before shutdown.", 0) \ + DECLARE(Double, cannot_allocate_thread_fault_injection_probability, 0, "For testing purposes.", 0) \ + DECLARE(Int32, max_connections, 1024, "Max server connections.", 0) \ + DECLARE(UInt32, asynchronous_metrics_update_period_s, 1, "Period in seconds for updating asynchronous metrics.", 0) \ + DECLARE(UInt32, asynchronous_heavy_metrics_update_period_s, 120, "Period in seconds for updating heavy asynchronous metrics.", 0) \ + DECLARE(String, default_database, "default", "Default database name.", 0) \ + DECLARE(String, tmp_policy, "", "Policy for storage with temporary data.", 0) \ + DECLARE(UInt64, max_temporary_data_on_disk_size, 0, "The maximum amount of storage that could be used for external aggregation, joins or sorting.", 0) \ + DECLARE(String, temporary_data_in_cache, "", "Cache disk name for temporary data.", 0) \ + DECLARE(UInt64, aggregate_function_group_array_max_element_size, 0xFFFFFF, "Max array element size in bytes for groupArray function. This limit is checked at serialization and help to avoid large state size.", 0) \ + DECLARE(GroupArrayActionWhenLimitReached, aggregate_function_group_array_action_when_limit_is_reached, GroupArrayActionWhenLimitReached::THROW, "Action to execute when max array element size is exceeded in groupArray: `throw` exception, or `discard` extra values", 0) \ + DECLARE(UInt64, max_server_memory_usage, 0, "Maximum total memory usage of the server in bytes. Zero means unlimited.", 0) \ + DECLARE(Double, max_server_memory_usage_to_ram_ratio, 0.9, "Same as max_server_memory_usage but in to RAM ratio. Allows to lower max memory on low-memory systems.", 0) \ + DECLARE(UInt64, merges_mutations_memory_usage_soft_limit, 0, "Maximum total memory usage for merges and mutations in bytes. Zero means unlimited.", 0) \ + DECLARE(Double, merges_mutations_memory_usage_to_ram_ratio, 0.5, "Same as merges_mutations_memory_usage_soft_limit but in to RAM ratio. Allows to lower memory limit on low-memory systems.", 0) \ + DECLARE(Bool, allow_use_jemalloc_memory, true, "Allows to use jemalloc memory.", 0) \ + DECLARE(UInt64, cgroups_memory_usage_observer_wait_time, 15, "Polling interval in seconds to read the current memory usage from cgroups. Zero means disabled.", 0) \ + DECLARE(Double, cgroup_memory_watcher_hard_limit_ratio, 0.95, "Hard memory limit ratio for cgroup memory usage observer", 0) \ + DECLARE(Double, cgroup_memory_watcher_soft_limit_ratio, 0.9, "Soft memory limit ratio limit for cgroup memory usage observer", 0) \ + DECLARE(UInt64, async_insert_threads, 16, "Maximum number of threads to actually parse and insert data in background. Zero means asynchronous mode is disabled", 0) \ + DECLARE(Bool, async_insert_queue_flush_on_shutdown, true, "If true queue of asynchronous inserts is flushed on graceful shutdown", 0) \ + DECLARE(Bool, ignore_empty_sql_security_in_create_view_query, true, "If true, ClickHouse doesn't write defaults for empty SQL security statement in CREATE VIEW queries. This setting is only necessary for the migration period and will become obsolete in 24.4", 0) \ + DECLARE(UInt64, max_build_vector_similarity_index_thread_pool_size, 16, "The maximum number of threads to use to build vector similarity indexes. 0 means all cores.", 0) \ \ /* Database Catalog */ \ - M(UInt64, database_atomic_delay_before_drop_table_sec, 8 * 60, "The delay during which a dropped table can be restored using the UNDROP statement. If DROP TABLE ran with a SYNC modifier, the setting is ignored.", 0) \ - M(UInt64, database_catalog_unused_dir_hide_timeout_sec, 60 * 60, "Parameter of a task that cleans up garbage from store/ directory. If some subdirectory is not used by clickhouse-server and this directory was not modified for last database_catalog_unused_dir_hide_timeout_sec seconds, the task will 'hide' this directory by removing all access rights. It also works for directories that clickhouse-server does not expect to see inside store/. Zero means 'immediately'.", 0) \ - M(UInt64, database_catalog_unused_dir_rm_timeout_sec, 30 * 24 * 60 * 60, "Parameter of a task that cleans up garbage from store/ directory. If some subdirectory is not used by clickhouse-server and it was previously 'hidden' (see database_catalog_unused_dir_hide_timeout_sec) and this directory was not modified for last database_catalog_unused_dir_rm_timeout_sec seconds, the task will remove this directory. It also works for directories that clickhouse-server does not expect to see inside store/. Zero means 'never'.", 0) \ - M(UInt64, database_catalog_unused_dir_cleanup_period_sec, 24 * 60 * 60, "Parameter of a task that cleans up garbage from store/ directory. Sets scheduling period of the task. Zero means 'never'.", 0) \ - M(UInt64, database_catalog_drop_error_cooldown_sec, 5, "In case if drop table failed, ClickHouse will wait for this timeout before retrying the operation.", 0) \ - M(UInt64, database_catalog_drop_table_concurrency, 16, "The size of the threadpool used for dropping tables.", 0) \ + DECLARE(UInt64, database_atomic_delay_before_drop_table_sec, 8 * 60, "The delay during which a dropped table can be restored using the UNDROP statement. If DROP TABLE ran with a SYNC modifier, the setting is ignored.", 0) \ + DECLARE(UInt64, database_catalog_unused_dir_hide_timeout_sec, 60 * 60, "Parameter of a task that cleans up garbage from store/ directory. If some subdirectory is not used by clickhouse-server and this directory was not modified for last database_catalog_unused_dir_hide_timeout_sec seconds, the task will 'hide' this directory by removing all access rights. It also works for directories that clickhouse-server does not expect to see inside store/. Zero means 'immediately'.", 0) \ + DECLARE(UInt64, database_catalog_unused_dir_rm_timeout_sec, 30 * 24 * 60 * 60, "Parameter of a task that cleans up garbage from store/ directory. If some subdirectory is not used by clickhouse-server and it was previously 'hidden' (see database_catalog_unused_dir_hide_timeout_sec) and this directory was not modified for last database_catalog_unused_dir_rm_timeout_sec seconds, the task will remove this directory. It also works for directories that clickhouse-server does not expect to see inside store/. Zero means 'never'.", 0) \ + DECLARE(UInt64, database_catalog_unused_dir_cleanup_period_sec, 24 * 60 * 60, "Parameter of a task that cleans up garbage from store/ directory. Sets scheduling period of the task. Zero means 'never'.", 0) \ + DECLARE(UInt64, database_catalog_drop_error_cooldown_sec, 5, "In case if drop table failed, ClickHouse will wait for this timeout before retrying the operation.", 0) \ + DECLARE(UInt64, database_catalog_drop_table_concurrency, 16, "The size of the threadpool used for dropping tables.", 0) \ \ \ - M(UInt64, max_concurrent_queries, 0, "Maximum number of concurrently executed queries. Zero means unlimited.", 0) \ - M(UInt64, max_concurrent_insert_queries, 0, "Maximum number of concurrently INSERT queries. Zero means unlimited.", 0) \ - M(UInt64, max_concurrent_select_queries, 0, "Maximum number of concurrently SELECT queries. Zero means unlimited.", 0) \ - M(UInt64, max_waiting_queries, 0, "Maximum number of concurrently waiting queries blocked due to `async_load_databases`. Note that waiting queries are not considered by `max_concurrent_*queries*` limits. Zero means unlimited.", 0) \ + DECLARE(UInt64, max_concurrent_queries, 0, "Maximum number of concurrently executed queries. Zero means unlimited.", 0) \ + DECLARE(UInt64, max_concurrent_insert_queries, 0, "Maximum number of concurrently INSERT queries. Zero means unlimited.", 0) \ + DECLARE(UInt64, max_concurrent_select_queries, 0, "Maximum number of concurrently SELECT queries. Zero means unlimited.", 0) \ + DECLARE(UInt64, max_waiting_queries, 0, "Maximum number of concurrently waiting queries blocked due to `async_load_databases`. Note that waiting queries are not considered by `max_concurrent_*queries*` limits. Zero means unlimited.", 0) \ \ - M(Double, cache_size_to_ram_max_ratio, 0.5, "Set cache size to RAM max ratio. Allows to lower cache size on low-memory systems.", 0) \ - M(String, uncompressed_cache_policy, DEFAULT_UNCOMPRESSED_CACHE_POLICY, "Uncompressed cache policy name.", 0) \ - M(UInt64, uncompressed_cache_size, DEFAULT_UNCOMPRESSED_CACHE_MAX_SIZE, "Size of cache for uncompressed blocks. Zero means disabled.", 0) \ - M(Double, uncompressed_cache_size_ratio, DEFAULT_UNCOMPRESSED_CACHE_SIZE_RATIO, "The size of the protected queue in the uncompressed cache relative to the cache's total size.", 0) \ - M(String, mark_cache_policy, DEFAULT_MARK_CACHE_POLICY, "Mark cache policy name.", 0) \ - M(UInt64, mark_cache_size, DEFAULT_MARK_CACHE_MAX_SIZE, "Size of cache for marks (index of MergeTree family of tables).", 0) \ - M(Double, mark_cache_size_ratio, DEFAULT_MARK_CACHE_SIZE_RATIO, "The size of the protected queue in the mark cache relative to the cache's total size.", 0) \ - M(String, index_uncompressed_cache_policy, DEFAULT_INDEX_UNCOMPRESSED_CACHE_POLICY, "Secondary index uncompressed cache policy name.", 0) \ - M(UInt64, index_uncompressed_cache_size, DEFAULT_INDEX_UNCOMPRESSED_CACHE_MAX_SIZE, "Size of cache for uncompressed blocks of secondary indices. Zero means disabled.", 0) \ - M(Double, index_uncompressed_cache_size_ratio, DEFAULT_INDEX_UNCOMPRESSED_CACHE_SIZE_RATIO, "The size of the protected queue in the secondary index uncompressed cache relative to the cache's total size.", 0) \ - M(String, index_mark_cache_policy, DEFAULT_INDEX_MARK_CACHE_POLICY, "Secondary index mark cache policy name.", 0) \ - M(UInt64, index_mark_cache_size, DEFAULT_INDEX_MARK_CACHE_MAX_SIZE, "Size of cache for secondary index marks. Zero means disabled.", 0) \ - M(Double, index_mark_cache_size_ratio, DEFAULT_INDEX_MARK_CACHE_SIZE_RATIO, "The size of the protected queue in the secondary index mark cache relative to the cache's total size.", 0) \ - M(UInt64, page_cache_chunk_size, 2 << 20, "Bytes per chunk in userspace page cache. Rounded up to a multiple of page size (typically 4 KiB) or huge page size (typically 2 MiB, only if page_cache_use_thp is enabled).", 0) \ - M(UInt64, page_cache_mmap_size, 1 << 30, "Bytes per memory mapping in userspace page cache. Not important.", 0) \ - M(UInt64, page_cache_size, 0, "Amount of virtual memory to map for userspace page cache. If page_cache_use_madv_free is enabled, it's recommended to set this higher than the machine's RAM size. Use 0 to disable userspace page cache.", 0) \ - M(Bool, page_cache_use_madv_free, DBMS_DEFAULT_PAGE_CACHE_USE_MADV_FREE, "If true, the userspace page cache will allow the OS to automatically reclaim memory from the cache on memory pressure (using MADV_FREE).", 0) \ - M(Bool, page_cache_use_transparent_huge_pages, true, "Userspace will attempt to use transparent huge pages on Linux. This is best-effort.", 0) \ - M(UInt64, mmap_cache_size, DEFAULT_MMAP_CACHE_MAX_SIZE, "A cache for mmapped files.", 0) \ - M(UInt64, compiled_expression_cache_size, DEFAULT_COMPILED_EXPRESSION_CACHE_MAX_SIZE, "Byte size of compiled expressions cache.", 0) \ - M(UInt64, compiled_expression_cache_elements_size, DEFAULT_COMPILED_EXPRESSION_CACHE_MAX_ENTRIES, "Maximum entries in compiled expressions cache.", 0) \ + DECLARE(Double, cache_size_to_ram_max_ratio, 0.5, "Set cache size to RAM max ratio. Allows to lower cache size on low-memory systems.", 0) \ + DECLARE(String, uncompressed_cache_policy, DEFAULT_UNCOMPRESSED_CACHE_POLICY, "Uncompressed cache policy name.", 0) \ + DECLARE(UInt64, uncompressed_cache_size, DEFAULT_UNCOMPRESSED_CACHE_MAX_SIZE, "Size of cache for uncompressed blocks. Zero means disabled.", 0) \ + DECLARE(Double, uncompressed_cache_size_ratio, DEFAULT_UNCOMPRESSED_CACHE_SIZE_RATIO, "The size of the protected queue in the uncompressed cache relative to the cache's total size.", 0) \ + DECLARE(String, mark_cache_policy, DEFAULT_MARK_CACHE_POLICY, "Mark cache policy name.", 0) \ + DECLARE(UInt64, mark_cache_size, DEFAULT_MARK_CACHE_MAX_SIZE, "Size of cache for marks (index of MergeTree family of tables).", 0) \ + DECLARE(Double, mark_cache_size_ratio, DEFAULT_MARK_CACHE_SIZE_RATIO, "The size of the protected queue in the mark cache relative to the cache's total size.", 0) \ + DECLARE(String, index_uncompressed_cache_policy, DEFAULT_INDEX_UNCOMPRESSED_CACHE_POLICY, "Secondary index uncompressed cache policy name.", 0) \ + DECLARE(UInt64, index_uncompressed_cache_size, DEFAULT_INDEX_UNCOMPRESSED_CACHE_MAX_SIZE, "Size of cache for uncompressed blocks of secondary indices. Zero means disabled.", 0) \ + DECLARE(Double, index_uncompressed_cache_size_ratio, DEFAULT_INDEX_UNCOMPRESSED_CACHE_SIZE_RATIO, "The size of the protected queue in the secondary index uncompressed cache relative to the cache's total size.", 0) \ + DECLARE(String, index_mark_cache_policy, DEFAULT_INDEX_MARK_CACHE_POLICY, "Secondary index mark cache policy name.", 0) \ + DECLARE(UInt64, index_mark_cache_size, DEFAULT_INDEX_MARK_CACHE_MAX_SIZE, "Size of cache for secondary index marks. Zero means disabled.", 0) \ + DECLARE(Double, index_mark_cache_size_ratio, DEFAULT_INDEX_MARK_CACHE_SIZE_RATIO, "The size of the protected queue in the secondary index mark cache relative to the cache's total size.", 0) \ + DECLARE(UInt64, page_cache_chunk_size, 2 << 20, "Bytes per chunk in userspace page cache. Rounded up to a multiple of page size (typically 4 KiB) or huge page size (typically 2 MiB, only if page_cache_use_thp is enabled).", 0) \ + DECLARE(UInt64, page_cache_mmap_size, 1 << 30, "Bytes per memory mapping in userspace page cache. Not important.", 0) \ + DECLARE(UInt64, page_cache_size, 0, "Amount of virtual memory to map for userspace page cache. If page_cache_use_madv_free is enabled, it's recommended to set this higher than the machine's RAM size. Use 0 to disable userspace page cache.", 0) \ + DECLARE(Bool, page_cache_use_madv_free, DBMS_DEFAULT_PAGE_CACHE_USE_MADV_FREE, "If true, the userspace page cache will allow the OS to automatically reclaim memory from the cache on memory pressure (using MADV_FREE).", 0) \ + DECLARE(Bool, page_cache_use_transparent_huge_pages, true, "Userspace will attempt to use transparent huge pages on Linux. This is best-effort.", 0) \ + DECLARE(UInt64, mmap_cache_size, DEFAULT_MMAP_CACHE_MAX_SIZE, "A cache for mmapped files.", 0) \ + DECLARE(UInt64, compiled_expression_cache_size, DEFAULT_COMPILED_EXPRESSION_CACHE_MAX_SIZE, "Byte size of compiled expressions cache.", 0) \ + DECLARE(UInt64, compiled_expression_cache_elements_size, DEFAULT_COMPILED_EXPRESSION_CACHE_MAX_ENTRIES, "Maximum entries in compiled expressions cache.", 0) \ \ - M(Bool, disable_internal_dns_cache, false, "Disable internal DNS caching at all.", 0) \ - M(UInt64, dns_cache_max_entries, 10000, "Internal DNS cache max entries.", 0) \ - M(Int32, dns_cache_update_period, 15, "Internal DNS cache update period in seconds.", 0) \ - M(UInt32, dns_max_consecutive_failures, 10, "Max DNS resolve failures of a hostname before dropping the hostname from ClickHouse DNS cache.", 0) \ - M(Bool, dns_allow_resolve_names_to_ipv4, true, "Allows resolve names to ipv4 addresses.", 0) \ - M(Bool, dns_allow_resolve_names_to_ipv6, true, "Allows resolve names to ipv6 addresses.", 0) \ + DECLARE(Bool, disable_internal_dns_cache, false, "Disable internal DNS caching at all.", 0) \ + DECLARE(UInt64, dns_cache_max_entries, 10000, "Internal DNS cache max entries.", 0) \ + DECLARE(Int32, dns_cache_update_period, 15, "Internal DNS cache update period in seconds.", 0) \ + DECLARE(UInt32, dns_max_consecutive_failures, 10, "Max DNS resolve failures of a hostname before dropping the hostname from ClickHouse DNS cache.", 0) \ + DECLARE(Bool, dns_allow_resolve_names_to_ipv4, true, "Allows resolve names to ipv4 addresses.", 0) \ + DECLARE(Bool, dns_allow_resolve_names_to_ipv6, true, "Allows resolve names to ipv6 addresses.", 0) \ \ - M(UInt64, max_table_size_to_drop, 50000000000lu, "If size of a table is greater than this value (in bytes) than table could not be dropped with any DROP query.", 0) \ - M(UInt64, max_partition_size_to_drop, 50000000000lu, "Same as max_table_size_to_drop, but for the partitions.", 0) \ - M(UInt64, max_table_num_to_warn, 5000lu, "If the number of tables is greater than this value, the server will create a warning that will displayed to user.", 0) \ - M(UInt64, max_view_num_to_warn, 10000lu, "If the number of views is greater than this value, the server will create a warning that will displayed to user.", 0) \ - M(UInt64, max_dictionary_num_to_warn, 1000lu, "If the number of dictionaries is greater than this value, the server will create a warning that will displayed to user.", 0) \ - M(UInt64, max_database_num_to_warn, 1000lu, "If the number of databases is greater than this value, the server will create a warning that will displayed to user.", 0) \ - M(UInt64, max_part_num_to_warn, 100000lu, "If the number of parts is greater than this value, the server will create a warning that will displayed to user.", 0) \ - M(UInt64, max_table_num_to_throw, 0lu, "If number of tables is greater than this value, server will throw an exception. 0 means no limitation. View, remote tables, dictionary, system tables are not counted. Only count table in Atomic/Ordinary/Replicated/Lazy database engine.", 0) \ - M(UInt64, max_database_num_to_throw, 0lu, "If number of databases is greater than this value, server will throw an exception. 0 means no limitation.", 0) \ - M(UInt64, max_authentication_methods_per_user, 100, "The maximum number of authentication methods a user can be created with or altered. Changing this setting does not affect existing users. Zero means unlimited", 0) \ - M(UInt64, concurrent_threads_soft_limit_num, 0, "Sets how many concurrent thread can be allocated before applying CPU pressure. Zero means unlimited.", 0) \ - M(UInt64, concurrent_threads_soft_limit_ratio_to_cores, 0, "Same as concurrent_threads_soft_limit_num, but with ratio to cores.", 0) \ + DECLARE(UInt64, max_table_size_to_drop, 50000000000lu, "If size of a table is greater than this value (in bytes) than table could not be dropped with any DROP query.", 0) \ + DECLARE(UInt64, max_partition_size_to_drop, 50000000000lu, "Same as max_table_size_to_drop, but for the partitions.", 0) \ + DECLARE(UInt64, max_table_num_to_warn, 5000lu, "If the number of tables is greater than this value, the server will create a warning that will displayed to user.", 0) \ + DECLARE(UInt64, max_view_num_to_warn, 10000lu, "If the number of views is greater than this value, the server will create a warning that will displayed to user.", 0) \ + DECLARE(UInt64, max_dictionary_num_to_warn, 1000lu, "If the number of dictionaries is greater than this value, the server will create a warning that will displayed to user.", 0) \ + DECLARE(UInt64, max_database_num_to_warn, 1000lu, "If the number of databases is greater than this value, the server will create a warning that will displayed to user.", 0) \ + DECLARE(UInt64, max_part_num_to_warn, 100000lu, "If the number of parts is greater than this value, the server will create a warning that will displayed to user.", 0) \ + DECLARE(UInt64, max_table_num_to_throw, 0lu, "If number of tables is greater than this value, server will throw an exception. 0 means no limitation. View, remote tables, dictionary, system tables are not counted. Only count table in Atomic/Ordinary/Replicated/Lazy database engine.", 0) \ + DECLARE(UInt64, max_database_num_to_throw, 0lu, "If number of databases is greater than this value, server will throw an exception. 0 means no limitation.", 0) \ + DECLARE(UInt64, max_authentication_methods_per_user, 100, "The maximum number of authentication methods a user can be created with or altered. Changing this setting does not affect existing users. Zero means unlimited", 0) \ + DECLARE(UInt64, concurrent_threads_soft_limit_num, 0, "Sets how many concurrent thread can be allocated before applying CPU pressure. Zero means unlimited.", 0) \ + DECLARE(UInt64, concurrent_threads_soft_limit_ratio_to_cores, 0, "Same as concurrent_threads_soft_limit_num, but with ratio to cores.", 0) \ \ - M(UInt64, background_pool_size, 16, "The maximum number of threads what will be used for merging or mutating data parts for *MergeTree-engine tables in a background.", 0) \ - M(Float, background_merges_mutations_concurrency_ratio, 2, "The number of part mutation tasks that can be executed concurrently by each thread in background pool.", 0) \ - M(String, background_merges_mutations_scheduling_policy, "round_robin", "The policy on how to perform a scheduling for background merges and mutations. Possible values are: `round_robin` and `shortest_task_first`. ", 0) \ - M(UInt64, background_move_pool_size, 8, "The maximum number of threads that will be used for moving data parts to another disk or volume for *MergeTree-engine tables in a background.", 0) \ - M(UInt64, background_fetches_pool_size, 16, "The maximum number of threads that will be used for fetching data parts from another replica for *MergeTree-engine tables in a background.", 0) \ - M(UInt64, background_common_pool_size, 8, "The maximum number of threads that will be used for performing a variety of operations (mostly garbage collection) for *MergeTree-engine tables in a background.", 0) \ - M(UInt64, background_buffer_flush_schedule_pool_size, 16, "The maximum number of threads that will be used for performing flush operations for Buffer-engine tables in a background.", 0) \ - M(UInt64, background_schedule_pool_size, 512, "The maximum number of threads that will be used for constantly executing some lightweight periodic operations.", 0) \ - M(UInt64, background_message_broker_schedule_pool_size, 16, "The maximum number of threads that will be used for executing background operations for message streaming.", 0) \ - M(UInt64, background_distributed_schedule_pool_size, 16, "The maximum number of threads that will be used for executing distributed sends.", 0) \ - M(UInt64, tables_loader_foreground_pool_size, 0, "The maximum number of threads that will be used for foreground (that is being waited for by a query) loading of tables. Also used for synchronous loading of tables before the server start. Zero means use all CPUs.", 0) \ - M(UInt64, tables_loader_background_pool_size, 0, "The maximum number of threads that will be used for background async loading of tables. Zero means use all CPUs.", 0) \ - M(Bool, async_load_databases, false, "Enable asynchronous loading of databases and tables to speedup server startup. Queries to not yet loaded entity will be blocked until load is finished.", 0) \ - M(Bool, display_secrets_in_show_and_select, false, "Allow showing secrets in SHOW and SELECT queries via a format setting and a grant", 0) \ - M(Seconds, keep_alive_timeout, DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT, "The number of seconds that ClickHouse waits for incoming requests before closing the connection.", 0) \ - M(UInt64, max_keep_alive_requests, 10000, "The maximum number of requests handled via a single http keepalive connection before the server closes this connection.", 0) \ - M(Seconds, replicated_fetches_http_connection_timeout, 0, "HTTP connection timeout for part fetch requests. Inherited from default profile `http_connection_timeout` if not set explicitly.", 0) \ - M(Seconds, replicated_fetches_http_send_timeout, 0, "HTTP send timeout for part fetch requests. Inherited from default profile `http_send_timeout` if not set explicitly.", 0) \ - M(Seconds, replicated_fetches_http_receive_timeout, 0, "HTTP receive timeout for fetch part requests. Inherited from default profile `http_receive_timeout` if not set explicitly.", 0) \ - M(UInt64, total_memory_profiler_step, 0, "Whenever server memory usage becomes larger than every next step in number of bytes the memory profiler will collect the allocating stack trace. Zero means disabled memory profiler. Values lower than a few megabytes will slow down server.", 0) \ - M(Double, total_memory_tracker_sample_probability, 0, "Collect random allocations and deallocations and write them into system.trace_log with 'MemorySample' trace_type. The probability is for every alloc/free regardless to the size of the allocation (can be changed with `memory_profiler_sample_min_allocation_size` and `memory_profiler_sample_max_allocation_size`). Note that sampling happens only when the amount of untracked memory exceeds 'max_untracked_memory'. You may want to set 'max_untracked_memory' to 0 for extra fine grained sampling.", 0) \ - M(UInt64, total_memory_profiler_sample_min_allocation_size, 0, "Collect random allocations of size greater or equal than specified value with probability equal to `total_memory_profiler_sample_probability`. 0 means disabled. You may want to set 'max_untracked_memory' to 0 to make this threshold to work as expected.", 0) \ - M(UInt64, total_memory_profiler_sample_max_allocation_size, 0, "Collect random allocations of size less or equal than specified value with probability equal to `total_memory_profiler_sample_probability`. 0 means disabled. You may want to set 'max_untracked_memory' to 0 to make this threshold to work as expected.", 0) \ - M(Bool, validate_tcp_client_information, false, "Validate client_information in the query packet over the native TCP protocol.", 0) \ - M(Bool, storage_metadata_write_full_object_key, false, "Write disk metadata files with VERSION_FULL_OBJECT_KEY format", 0) \ - M(UInt64, max_materialized_views_count_for_table, 0, "A limit on the number of materialized views attached to a table.", 0) \ - M(UInt32, max_database_replicated_create_table_thread_pool_size, 1, "The number of threads to create tables during replica recovery in DatabaseReplicated. Zero means number of threads equal number of cores.", 0) \ - M(Bool, database_replicated_allow_detach_permanently, true, "Allow detaching tables permanently in Replicated databases", 0) \ - M(Bool, format_alter_operations_with_parentheses, false, "If enabled, each operation in alter queries will be surrounded with parentheses in formatted queries to make them less ambiguous.", 0) \ - M(String, default_replica_path, "/clickhouse/tables/{uuid}/{shard}", "The path to the table in ZooKeeper", 0) \ - M(String, default_replica_name, "{replica}", "The replica name in ZooKeeper", 0) \ - M(UInt64, disk_connections_soft_limit, 5000, "Connections above this limit have significantly shorter time to live. The limit applies to the disks connections.", 0) \ - M(UInt64, disk_connections_warn_limit, 10000, "Warning massages are written to the logs if number of in-use connections are higher than this limit. The limit applies to the disks connections.", 0) \ - M(UInt64, disk_connections_store_limit, 30000, "Connections above this limit reset after use. Set to 0 to turn connection cache off. The limit applies to the disks connections.", 0) \ - M(UInt64, storage_connections_soft_limit, 100, "Connections above this limit have significantly shorter time to live. The limit applies to the storages connections.", 0) \ - M(UInt64, storage_connections_warn_limit, 1000, "Warning massages are written to the logs if number of in-use connections are higher than this limit. The limit applies to the storages connections.", 0) \ - M(UInt64, storage_connections_store_limit, 5000, "Connections above this limit reset after use. Set to 0 to turn connection cache off. The limit applies to the storages connections.", 0) \ - M(UInt64, http_connections_soft_limit, 100, "Connections above this limit have significantly shorter time to live. The limit applies to the http connections which do not belong to any disk or storage.", 0) \ - M(UInt64, http_connections_warn_limit, 1000, "Warning massages are written to the logs if number of in-use connections are higher than this limit. The limit applies to the http connections which do not belong to any disk or storage.", 0) \ - M(UInt64, http_connections_store_limit, 5000, "Connections above this limit reset after use. Set to 0 to turn connection cache off. The limit applies to the http connections which do not belong to any disk or storage.", 0) \ - M(UInt64, global_profiler_real_time_period_ns, 0, "Period for real clock timer of global profiler (in nanoseconds). Set 0 value to turn off the real clock global profiler. Recommended value is at least 10000000 (100 times a second) for single queries or 1000000000 (once a second) for cluster-wide profiling.", 0) \ - M(UInt64, global_profiler_cpu_time_period_ns, 0, "Period for CPU clock timer of global profiler (in nanoseconds). Set 0 value to turn off the CPU clock global profiler. Recommended value is at least 10000000 (100 times a second) for single queries or 1000000000 (once a second) for cluster-wide profiling.", 0) \ - M(Bool, enable_azure_sdk_logging, false, "Enables logging from Azure sdk", 0) \ - M(UInt64, max_entries_for_hash_table_stats, 10'000, "How many entries hash table statistics collected during aggregation is allowed to have", 0) \ - M(String, merge_workload, "default", "Name of workload to be used to access resources for all merges (may be overridden by a merge tree setting)", 0) \ - M(String, mutation_workload, "default", "Name of workload to be used to access resources for all mutations (may be overridden by a merge tree setting)", 0) \ - M(Bool, prepare_system_log_tables_on_startup, false, "If true, ClickHouse creates all configured `system.*_log` tables before the startup. It can be helpful if some startup scripts depend on these tables.", 0) \ - M(Double, gwp_asan_force_sample_probability, 0.0003, "Probability that an allocation from specific places will be sampled by GWP Asan (i.e. PODArray allocations)", 0) \ - M(UInt64, config_reload_interval_ms, 2000, "How often clickhouse will reload config and check for new changes", 0) \ - M(UInt64, memory_worker_period_ms, 0, "Tick period of background memory worker which corrects memory tracker memory usages and cleans up unused pages during higher memory usage. If set to 0, default value will be used depending on the memory usage source", 0) \ - M(Bool, disable_insertion_and_mutation, false, "Disable all insert/alter/delete queries. This setting will be enabled if someone needs read-only nodes to prevent insertion and mutation affect reading performance.", 0) \ - M(UInt64, parts_kill_delay_period, 30, "Period to completely remove parts for SharedMergeTree. Only available in ClickHouse Cloud", 0) \ - M(UInt64, parts_kill_delay_period_random_add, 10, "Add uniformly distributed value from 0 to x seconds to kill_delay_period to avoid thundering herd effect and subsequent DoS of ZooKeeper in case of very large number of tables. Only available in ClickHouse Cloud", 0) \ - M(UInt64, parts_killer_pool_size, 128, "Threads for cleanup of shared merge tree outdated threads. Only available in ClickHouse Cloud", 0) \ - M(UInt64, keeper_multiread_batch_size, 10'000, "Maximum size of batch for MultiRead request to [Zoo]Keeper that support batching. If set to 0, batching is disabled. Available only in ClickHouse Cloud.", 0) \ - M(Bool, use_legacy_mongodb_integration, true, "Use the legacy MongoDB integration implementation. Note: it's highly recommended to set this option to false, since legacy implementation will be removed in the future. Please submit any issues you encounter with the new implementation.", 0) \ + DECLARE(UInt64, background_pool_size, 16, "The maximum number of threads what will be used for merging or mutating data parts for *MergeTree-engine tables in a background.", 0) \ + DECLARE(Float, background_merges_mutations_concurrency_ratio, 2, "The number of part mutation tasks that can be executed concurrently by each thread in background pool.", 0) \ + DECLARE(String, background_merges_mutations_scheduling_policy, "round_robin", "The policy on how to perform a scheduling for background merges and mutations. Possible values are: `round_robin` and `shortest_task_first`. ", 0) \ + DECLARE(UInt64, background_move_pool_size, 8, "The maximum number of threads that will be used for moving data parts to another disk or volume for *MergeTree-engine tables in a background.", 0) \ + DECLARE(UInt64, background_fetches_pool_size, 16, "The maximum number of threads that will be used for fetching data parts from another replica for *MergeTree-engine tables in a background.", 0) \ + DECLARE(UInt64, background_common_pool_size, 8, "The maximum number of threads that will be used for performing a variety of operations (mostly garbage collection) for *MergeTree-engine tables in a background.", 0) \ + DECLARE(UInt64, background_buffer_flush_schedule_pool_size, 16, "The maximum number of threads that will be used for performing flush operations for Buffer-engine tables in a background.", 0) \ + DECLARE(UInt64, background_schedule_pool_size, 512, "The maximum number of threads that will be used for constantly executing some lightweight periodic operations.", 0) \ + DECLARE(UInt64, background_message_broker_schedule_pool_size, 16, "The maximum number of threads that will be used for executing background operations for message streaming.", 0) \ + DECLARE(UInt64, background_distributed_schedule_pool_size, 16, "The maximum number of threads that will be used for executing distributed sends.", 0) \ + DECLARE(UInt64, tables_loader_foreground_pool_size, 0, "The maximum number of threads that will be used for foreground (that is being waited for by a query) loading of tables. Also used for synchronous loading of tables before the server start. Zero means use all CPUs.", 0) \ + DECLARE(UInt64, tables_loader_background_pool_size, 0, "The maximum number of threads that will be used for background async loading of tables. Zero means use all CPUs.", 0) \ + DECLARE(Bool, async_load_databases, false, "Enable asynchronous loading of databases and tables to speedup server startup. Queries to not yet loaded entity will be blocked until load is finished.", 0) \ + DECLARE(Bool, display_secrets_in_show_and_select, false, "Allow showing secrets in SHOW and SELECT queries via a format setting and a grant", 0) \ + DECLARE(Seconds, keep_alive_timeout, DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT, "The number of seconds that ClickHouse waits for incoming requests before closing the connection.", 0) \ + DECLARE(UInt64, max_keep_alive_requests, 10000, "The maximum number of requests handled via a single http keepalive connection before the server closes this connection.", 0) \ + DECLARE(Seconds, replicated_fetches_http_connection_timeout, 0, "HTTP connection timeout for part fetch requests. Inherited from default profile `http_connection_timeout` if not set explicitly.", 0) \ + DECLARE(Seconds, replicated_fetches_http_send_timeout, 0, "HTTP send timeout for part fetch requests. Inherited from default profile `http_send_timeout` if not set explicitly.", 0) \ + DECLARE(Seconds, replicated_fetches_http_receive_timeout, 0, "HTTP receive timeout for fetch part requests. Inherited from default profile `http_receive_timeout` if not set explicitly.", 0) \ + DECLARE(UInt64, total_memory_profiler_step, 0, "Whenever server memory usage becomes larger than every next step in number of bytes the memory profiler will collect the allocating stack trace. Zero means disabled memory profiler. Values lower than a few megabytes will slow down server.", 0) \ + DECLARE(Double, total_memory_tracker_sample_probability, 0, "Collect random allocations and deallocations and write them into system.trace_log with 'MemorySample' trace_type. The probability is for every alloc/free regardless to the size of the allocation (can be changed with `memory_profiler_sample_min_allocation_size` and `memory_profiler_sample_max_allocation_size`). Note that sampling happens only when the amount of untracked memory exceeds 'max_untracked_memory'. You may want to set 'max_untracked_memory' to 0 for extra fine grained sampling.", 0) \ + DECLARE(UInt64, total_memory_profiler_sample_min_allocation_size, 0, "Collect random allocations of size greater or equal than specified value with probability equal to `total_memory_profiler_sample_probability`. 0 means disabled. You may want to set 'max_untracked_memory' to 0 to make this threshold to work as expected.", 0) \ + DECLARE(UInt64, total_memory_profiler_sample_max_allocation_size, 0, "Collect random allocations of size less or equal than specified value with probability equal to `total_memory_profiler_sample_probability`. 0 means disabled. You may want to set 'max_untracked_memory' to 0 to make this threshold to work as expected.", 0) \ + DECLARE(Bool, validate_tcp_client_information, false, "Validate client_information in the query packet over the native TCP protocol.", 0) \ + DECLARE(Bool, storage_metadata_write_full_object_key, false, "Write disk metadata files with VERSION_FULL_OBJECT_KEY format", 0) \ + DECLARE(UInt64, max_materialized_views_count_for_table, 0, "A limit on the number of materialized views attached to a table.", 0) \ + DECLARE(UInt32, max_database_replicated_create_table_thread_pool_size, 1, "The number of threads to create tables during replica recovery in DatabaseReplicated. Zero means number of threads equal number of cores.", 0) \ + DECLARE(Bool, database_replicated_allow_detach_permanently, true, "Allow detaching tables permanently in Replicated databases", 0) \ + DECLARE(Bool, format_alter_operations_with_parentheses, false, "If enabled, each operation in alter queries will be surrounded with parentheses in formatted queries to make them less ambiguous.", 0) \ + DECLARE(String, default_replica_path, "/clickhouse/tables/{uuid}/{shard}", "The path to the table in ZooKeeper", 0) \ + DECLARE(String, default_replica_name, "{replica}", "The replica name in ZooKeeper", 0) \ + DECLARE(UInt64, disk_connections_soft_limit, 5000, "Connections above this limit have significantly shorter time to live. The limit applies to the disks connections.", 0) \ + DECLARE(UInt64, disk_connections_warn_limit, 10000, "Warning massages are written to the logs if number of in-use connections are higher than this limit. The limit applies to the disks connections.", 0) \ + DECLARE(UInt64, disk_connections_store_limit, 30000, "Connections above this limit reset after use. Set to 0 to turn connection cache off. The limit applies to the disks connections.", 0) \ + DECLARE(UInt64, storage_connections_soft_limit, 100, "Connections above this limit have significantly shorter time to live. The limit applies to the storages connections.", 0) \ + DECLARE(UInt64, storage_connections_warn_limit, 1000, "Warning massages are written to the logs if number of in-use connections are higher than this limit. The limit applies to the storages connections.", 0) \ + DECLARE(UInt64, storage_connections_store_limit, 5000, "Connections above this limit reset after use. Set to 0 to turn connection cache off. The limit applies to the storages connections.", 0) \ + DECLARE(UInt64, http_connections_soft_limit, 100, "Connections above this limit have significantly shorter time to live. The limit applies to the http connections which do not belong to any disk or storage.", 0) \ + DECLARE(UInt64, http_connections_warn_limit, 1000, "Warning massages are written to the logs if number of in-use connections are higher than this limit. The limit applies to the http connections which do not belong to any disk or storage.", 0) \ + DECLARE(UInt64, http_connections_store_limit, 5000, "Connections above this limit reset after use. Set to 0 to turn connection cache off. The limit applies to the http connections which do not belong to any disk or storage.", 0) \ + DECLARE(UInt64, global_profiler_real_time_period_ns, 0, "Period for real clock timer of global profiler (in nanoseconds). Set 0 value to turn off the real clock global profiler. Recommended value is at least 10000000 (100 times a second) for single queries or 1000000000 (once a second) for cluster-wide profiling.", 0) \ + DECLARE(UInt64, global_profiler_cpu_time_period_ns, 0, "Period for CPU clock timer of global profiler (in nanoseconds). Set 0 value to turn off the CPU clock global profiler. Recommended value is at least 10000000 (100 times a second) for single queries or 1000000000 (once a second) for cluster-wide profiling.", 0) \ + DECLARE(Bool, enable_azure_sdk_logging, false, "Enables logging from Azure sdk", 0) \ + DECLARE(UInt64, max_entries_for_hash_table_stats, 10'000, "How many entries hash table statistics collected during aggregation is allowed to have", 0) \ + DECLARE(String, merge_workload, "default", "Name of workload to be used to access resources for all merges (may be overridden by a merge tree setting)", 0) \ + DECLARE(String, mutation_workload, "default", "Name of workload to be used to access resources for all mutations (may be overridden by a merge tree setting)", 0) \ + DECLARE(Bool, prepare_system_log_tables_on_startup, false, "If true, ClickHouse creates all configured `system.*_log` tables before the startup. It can be helpful if some startup scripts depend on these tables.", 0) \ + DECLARE(Double, gwp_asan_force_sample_probability, 0.0003, "Probability that an allocation from specific places will be sampled by GWP Asan (i.e. PODArray allocations)", 0) \ + DECLARE(UInt64, config_reload_interval_ms, 2000, "How often clickhouse will reload config and check for new changes", 0) \ + DECLARE(UInt64, memory_worker_period_ms, 0, "Tick period of background memory worker which corrects memory tracker memory usages and cleans up unused pages during higher memory usage. If set to 0, default value will be used depending on the memory usage source", 0) \ + DECLARE(Bool, disable_insertion_and_mutation, false, "Disable all insert/alter/delete queries. This setting will be enabled if someone needs read-only nodes to prevent insertion and mutation affect reading performance.", 0) \ + DECLARE(UInt64, parts_kill_delay_period, 30, "Period to completely remove parts for SharedMergeTree. Only available in ClickHouse Cloud", 0) \ + DECLARE(UInt64, parts_kill_delay_period_random_add, 10, "Add uniformly distributed value from 0 to x seconds to kill_delay_period to avoid thundering herd effect and subsequent DoS of ZooKeeper in case of very large number of tables. Only available in ClickHouse Cloud", 0) \ + DECLARE(UInt64, parts_killer_pool_size, 128, "Threads for cleanup of shared merge tree outdated threads. Only available in ClickHouse Cloud", 0) \ + DECLARE(UInt64, keeper_multiread_batch_size, 10'000, "Maximum size of batch for MultiRead request to [Zoo]Keeper that support batching. If set to 0, batching is disabled. Available only in ClickHouse Cloud.", 0) \ + DECLARE(Bool, use_legacy_mongodb_integration, true, "Use the legacy MongoDB integration implementation. Note: it's highly recommended to set this option to false, since legacy implementation will be removed in the future. Please submit any issues you encounter with the new implementation.", 0) \ /// If you add a setting which can be updated at runtime, please update 'changeable_settings' map in dumpToSystemServerSettingsColumns below diff --git a/src/Core/Settings.cpp b/src/Core/Settings.cpp index 307cc5b9182..1790697d03e 100644 --- a/src/Core/Settings.cpp +++ b/src/Core/Settings.cpp @@ -4,7 +4,7 @@ #include #include #include -#include +#include #include #include #include @@ -49,14 +49,14 @@ namespace ErrorCodes // clang-format off #if defined(__CLION_IDE__) /// CLion freezes for a minute every time it processes this -#define COMMON_SETTINGS(M, ALIAS) -#define OBSOLETE_SETTINGS(M, ALIAS) +#define COMMON_SETTINGS(DECLARE, ALIAS) +#define OBSOLETE_SETTINGS(DECLARE, ALIAS) #else -#define COMMON_SETTINGS(M, ALIAS) \ - M(Dialect, dialect, Dialect::clickhouse, R"( +#define COMMON_SETTINGS(DECLARE, ALIAS) \ + DECLARE(Dialect, dialect, Dialect::clickhouse, R"( Which dialect will be used to parse query )", 0)\ - M(UInt64, min_compress_block_size, 65536, R"( + DECLARE(UInt64, min_compress_block_size, 65536, R"( For [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) tables. In order to reduce latency when processing queries, a block is compressed when writing the next mark if its size is at least `min_compress_block_size`. By default, 65,536. The actual size of the block, if the uncompressed data is less than `max_compress_block_size`, is no less than this value and no less than the volume of data for one mark. @@ -71,7 +71,7 @@ We are writing a URL column with the String type (average size of 60 bytes per v This is an expert-level setting, and you shouldn't change it if you're just getting started with ClickHouse. ::: )", 0) \ - M(UInt64, max_compress_block_size, 1048576, R"( + DECLARE(UInt64, max_compress_block_size, 1048576, R"( The maximum size of blocks of uncompressed data before compressing for writing to a table. By default, 1,048,576 (1 MiB). Specifying a smaller block size generally leads to slightly reduced compression ratio, the compression and decompression speed increases slightly due to cache locality, and memory consumption is reduced. :::note @@ -80,14 +80,14 @@ This is an expert-level setting, and you shouldn't change it if you're just gett Don’t confuse blocks for compression (a chunk of memory consisting of bytes) with blocks for query processing (a set of rows from a table). )", 0) \ - M(UInt64, max_block_size, DEFAULT_BLOCK_SIZE, R"( + DECLARE(UInt64, max_block_size, DEFAULT_BLOCK_SIZE, R"( In ClickHouse, data is processed by blocks, which are sets of column parts. The internal processing cycles for a single block are efficient but there are noticeable costs when processing each block. The `max_block_size` setting indicates the recommended maximum number of rows to include in a single block when loading data from tables. Blocks the size of `max_block_size` are not always loaded from the table: if ClickHouse determines that less data needs to be retrieved, a smaller block is processed. The block size should not be too small to avoid noticeable costs when processing each block. It should also not be too large to ensure that queries with a LIMIT clause execute quickly after processing the first block. When setting `max_block_size`, the goal should be to avoid consuming too much memory when extracting a large number of columns in multiple threads and to preserve at least some cache locality. )", 0) \ - M(UInt64, max_insert_block_size, DEFAULT_INSERT_BLOCK_SIZE, R"( + DECLARE(UInt64, max_insert_block_size, DEFAULT_INSERT_BLOCK_SIZE, R"( The size of blocks (in a count of rows) to form for insertion into a table. This setting only applies in cases when the server forms the blocks. For example, for an INSERT via the HTTP interface, the server parses the data format and forms blocks of the specified size. @@ -96,7 +96,7 @@ The setting also does not have a purpose when using INSERT SELECT, since data is The default is slightly more than `max_block_size`. The reason for this is that certain table engines (`*MergeTree`) form a data part on the disk for each inserted block, which is a fairly large entity. Similarly, `*MergeTree` tables sort data during insertion, and a large enough block size allow sorting more data in RAM. )", 0) \ - M(UInt64, min_insert_block_size_rows, DEFAULT_INSERT_BLOCK_SIZE, R"( + DECLARE(UInt64, min_insert_block_size_rows, DEFAULT_INSERT_BLOCK_SIZE, R"( Sets the minimum number of rows in the block that can be inserted into a table by an `INSERT` query. Smaller-sized blocks are squashed into bigger ones. Possible values: @@ -104,7 +104,7 @@ Possible values: - Positive integer. - 0 — Squashing disabled. )", 0) \ - M(UInt64, min_insert_block_size_bytes, (DEFAULT_INSERT_BLOCK_SIZE * 256), R"( + DECLARE(UInt64, min_insert_block_size_bytes, (DEFAULT_INSERT_BLOCK_SIZE * 256), R"( Sets the minimum number of bytes in the block which can be inserted into a table by an `INSERT` query. Smaller-sized blocks are squashed into bigger ones. Possible values: @@ -112,7 +112,7 @@ Possible values: - Positive integer. - 0 — Squashing disabled. )", 0) \ - M(UInt64, min_insert_block_size_rows_for_materialized_views, 0, R"( + DECLARE(UInt64, min_insert_block_size_rows_for_materialized_views, 0, R"( Sets the minimum number of rows in the block which can be inserted into a table by an `INSERT` query. Smaller-sized blocks are squashed into bigger ones. This setting is applied only for blocks inserted into [materialized view](../../sql-reference/statements/create/view.md). By adjusting this setting, you control blocks squashing while pushing to materialized view and avoid excessive memory usage. Possible values: @@ -124,7 +124,7 @@ Possible values: - [min_insert_block_size_rows](#min-insert-block-size-rows) )", 0) \ - M(UInt64, min_insert_block_size_bytes_for_materialized_views, 0, R"( + DECLARE(UInt64, min_insert_block_size_bytes_for_materialized_views, 0, R"( Sets the minimum number of bytes in the block which can be inserted into a table by an `INSERT` query. Smaller-sized blocks are squashed into bigger ones. This setting is applied only for blocks inserted into [materialized view](../../sql-reference/statements/create/view.md). By adjusting this setting, you control blocks squashing while pushing to materialized view and avoid excessive memory usage. Possible values: @@ -136,16 +136,16 @@ Possible values: - [min_insert_block_size_bytes](#min-insert-block-size-bytes) )", 0) \ - M(UInt64, min_external_table_block_size_rows, DEFAULT_INSERT_BLOCK_SIZE, R"( + DECLARE(UInt64, min_external_table_block_size_rows, DEFAULT_INSERT_BLOCK_SIZE, R"( Squash blocks passed to external table to specified size in rows, if blocks are not big enough. )", 0) \ - M(UInt64, min_external_table_block_size_bytes, (DEFAULT_INSERT_BLOCK_SIZE * 256), R"( + DECLARE(UInt64, min_external_table_block_size_bytes, (DEFAULT_INSERT_BLOCK_SIZE * 256), R"( Squash blocks passed to the external table to a specified size in bytes, if blocks are not big enough. )", 0) \ - M(UInt64, max_joined_block_size_rows, DEFAULT_BLOCK_SIZE, R"( + DECLARE(UInt64, max_joined_block_size_rows, DEFAULT_BLOCK_SIZE, R"( Maximum block size for JOIN result (if join algorithm supports it). 0 means unlimited. )", 0) \ - M(UInt64, max_insert_threads, 0, R"( + DECLARE(UInt64, max_insert_threads, 0, R"( The maximum number of threads to execute the `INSERT SELECT` query. Possible values: @@ -158,10 +158,10 @@ Cloud default value: from `2` to `4`, depending on the service size. Parallel `INSERT SELECT` has effect only if the `SELECT` part is executed in parallel, see [max_threads](#max_threads) setting. Higher values will lead to higher memory usage. )", 0) \ - M(UInt64, max_insert_delayed_streams_for_parallel_write, 0, R"( + DECLARE(UInt64, max_insert_delayed_streams_for_parallel_write, 0, R"( The maximum number of streams (columns) to delay final part flush. Default - auto (1000 in case of underlying storage supports parallel write, for example S3 and disabled otherwise) )", 0) \ - M(MaxThreads, max_final_threads, 0, R"( + DECLARE(MaxThreads, max_final_threads, 0, R"( Sets the maximum number of parallel threads for the `SELECT` query data read phase with the [FINAL](../../sql-reference/statements/select/from.md#select-from-final) modifier. Possible values: @@ -169,10 +169,10 @@ Possible values: - Positive integer. - 0 or 1 — Disabled. `SELECT` queries are executed in a single thread. )", 0) \ - M(UInt64, max_threads_for_indexes, 0, R"( + DECLARE(UInt64, max_threads_for_indexes, 0, R"( The maximum number of threads process indices. )", 0) \ - M(MaxThreads, max_threads, 0, R"( + DECLARE(MaxThreads, max_threads, 0, R"( The maximum number of query processing threads, excluding threads for retrieving data from remote servers (see the ‘max_distributed_connections’ parameter). This parameter applies to threads that perform the same stages of the query processing pipeline in parallel. @@ -182,33 +182,33 @@ For queries that are completed quickly because of a LIMIT, you can set a lower The smaller the `max_threads` value, the less memory is consumed. )", 0) \ - M(Bool, use_concurrency_control, true, R"( + DECLARE(Bool, use_concurrency_control, true, R"( Respect the server's concurrency control (see the `concurrent_threads_soft_limit_num` and `concurrent_threads_soft_limit_ratio_to_cores` global server settings). If disabled, it allows using a larger number of threads even if the server is overloaded (not recommended for normal usage, and needed mostly for tests). )", 0) \ - M(MaxThreads, max_download_threads, 4, R"( + DECLARE(MaxThreads, max_download_threads, 4, R"( The maximum number of threads to download data (e.g. for URL engine). )", 0) \ - M(MaxThreads, max_parsing_threads, 0, R"( + DECLARE(MaxThreads, max_parsing_threads, 0, R"( The maximum number of threads to parse data in input formats that support parallel parsing. By default, it is determined automatically )", 0) \ - M(UInt64, max_download_buffer_size, 10*1024*1024, R"( + DECLARE(UInt64, max_download_buffer_size, 10*1024*1024, R"( The maximal size of buffer for parallel downloading (e.g. for URL engine) per each thread. )", 0) \ - M(UInt64, max_read_buffer_size, DBMS_DEFAULT_BUFFER_SIZE, R"( + DECLARE(UInt64, max_read_buffer_size, DBMS_DEFAULT_BUFFER_SIZE, R"( The maximum size of the buffer to read from the filesystem. )", 0) \ - M(UInt64, max_read_buffer_size_local_fs, 128*1024, R"( + DECLARE(UInt64, max_read_buffer_size_local_fs, 128*1024, R"( The maximum size of the buffer to read from local filesystem. If set to 0 then max_read_buffer_size will be used. )", 0) \ - M(UInt64, max_read_buffer_size_remote_fs, 0, R"( + DECLARE(UInt64, max_read_buffer_size_remote_fs, 0, R"( The maximum size of the buffer to read from remote filesystem. If set to 0 then max_read_buffer_size will be used. )", 0) \ - M(UInt64, max_distributed_connections, 1024, R"( + DECLARE(UInt64, max_distributed_connections, 1024, R"( The maximum number of simultaneous connections with remote servers for distributed processing of a single query to a single Distributed table. We recommend setting a value no less than the number of servers in the cluster. The following parameters are only used when creating Distributed tables (and when launching a server), so there is no reason to change them at runtime. )", 0) \ - M(UInt64, max_query_size, DBMS_DEFAULT_MAX_QUERY_SIZE, R"( + DECLARE(UInt64, max_query_size, DBMS_DEFAULT_MAX_QUERY_SIZE, R"( The maximum number of bytes of a query string parsed by the SQL parser. Data in the VALUES clause of INSERT queries is processed by a separate stream parser (that consumes O(1) RAM) and not affected by this restriction. @@ -216,38 +216,38 @@ Data in the VALUES clause of INSERT queries is processed by a separate stream pa `max_query_size` cannot be set within an SQL query (e.g., `SELECT now() SETTINGS max_query_size=10000`) because ClickHouse needs to allocate a buffer to parse the query, and this buffer size is determined by the `max_query_size` setting, which must be configured before the query is executed. ::: )", 0) \ - M(UInt64, interactive_delay, 100000, R"( + DECLARE(UInt64, interactive_delay, 100000, R"( The interval in microseconds for checking whether request execution has been canceled and sending the progress. )", 0) \ - M(Seconds, connect_timeout, DBMS_DEFAULT_CONNECT_TIMEOUT_SEC, R"( + DECLARE(Seconds, connect_timeout, DBMS_DEFAULT_CONNECT_TIMEOUT_SEC, R"( Connection timeout if there are no replicas. )", 0) \ - M(Milliseconds, handshake_timeout_ms, 10000, R"( + DECLARE(Milliseconds, handshake_timeout_ms, 10000, R"( Timeout in milliseconds for receiving Hello packet from replicas during handshake. )", 0) \ - M(Milliseconds, connect_timeout_with_failover_ms, 1000, R"( + DECLARE(Milliseconds, connect_timeout_with_failover_ms, 1000, R"( The timeout in milliseconds for connecting to a remote server for a Distributed table engine, if the ‘shard’ and ‘replica’ sections are used in the cluster definition. If unsuccessful, several attempts are made to connect to various replicas. )", 0) \ - M(Milliseconds, connect_timeout_with_failover_secure_ms, 1000, R"( + DECLARE(Milliseconds, connect_timeout_with_failover_secure_ms, 1000, R"( Connection timeout for selecting first healthy replica (for secure connections). )", 0) \ - M(Seconds, receive_timeout, DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC, R"( + DECLARE(Seconds, receive_timeout, DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC, R"( Timeout for receiving data from the network, in seconds. If no bytes were received in this interval, the exception is thrown. If you set this setting on the client, the 'send_timeout' for the socket will also be set on the corresponding connection end on the server. )", 0) \ - M(Seconds, send_timeout, DBMS_DEFAULT_SEND_TIMEOUT_SEC, R"( + DECLARE(Seconds, send_timeout, DBMS_DEFAULT_SEND_TIMEOUT_SEC, R"( Timeout for sending data to the network, in seconds. If a client needs to send some data but is not able to send any bytes in this interval, the exception is thrown. If you set this setting on the client, the 'receive_timeout' for the socket will also be set on the corresponding connection end on the server. )", 0) \ - M(Seconds, tcp_keep_alive_timeout, DEFAULT_TCP_KEEP_ALIVE_TIMEOUT /* less than DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC */, R"( + DECLARE(Seconds, tcp_keep_alive_timeout, DEFAULT_TCP_KEEP_ALIVE_TIMEOUT /* less than DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC */, R"( The time in seconds the connection needs to remain idle before TCP starts sending keepalive probes )", 0) \ - M(Milliseconds, hedged_connection_timeout_ms, 50, R"( + DECLARE(Milliseconds, hedged_connection_timeout_ms, 50, R"( Connection timeout for establishing connection with replica for Hedged requests )", 0) \ - M(Milliseconds, receive_data_timeout_ms, 2000, R"( + DECLARE(Milliseconds, receive_data_timeout_ms, 2000, R"( Connection timeout for receiving first packet of data or packet with positive progress from replica )", 0) \ - M(Bool, use_hedged_requests, true, R"( + DECLARE(Bool, use_hedged_requests, true, R"( Enables hedged requests logic for remote queries. It allows to establish many connections with different replicas for query. New connection is enabled in case existent connection(s) with replica(s) were not established within `hedged_connection_timeout` or no data was received within `receive_data_timeout`. Query uses the first connection which send non empty progress packet (or data packet, if `allow_changing_replica_until_first_data_packet`); @@ -257,14 +257,14 @@ Enabled by default. Disabled by default on Cloud. )", 0) \ - M(Bool, allow_changing_replica_until_first_data_packet, false, R"( + DECLARE(Bool, allow_changing_replica_until_first_data_packet, false, R"( If it's enabled, in hedged requests we can start new connection until receiving first data packet even if we have already made some progress (but progress haven't updated for `receive_data_timeout` timeout), otherwise we disable changing replica after the first time we made progress. )", 0) \ - M(Milliseconds, queue_max_wait_ms, 0, R"( + DECLARE(Milliseconds, queue_max_wait_ms, 0, R"( The wait time in the request queue, if the number of concurrent requests exceeds the maximum. )", 0) \ - M(Milliseconds, connection_pool_max_wait_ms, 0, R"( + DECLARE(Milliseconds, connection_pool_max_wait_ms, 0, R"( The wait time in milliseconds for a connection when the connection pool is full. Possible values: @@ -272,7 +272,7 @@ Possible values: - Positive integer. - 0 — Infinite timeout. )", 0) \ - M(Milliseconds, replace_running_query_max_wait_ms, 5000, R"( + DECLARE(Milliseconds, replace_running_query_max_wait_ms, 5000, R"( The wait time for running the query with the same `query_id` to finish, when the [replace_running_query](#replace-running-query) setting is active. Possible values: @@ -280,7 +280,7 @@ Possible values: - Positive integer. - 0 — Throwing an exception that does not allow to run a new query if the server already executes a query with the same `query_id`. )", 0) \ - M(Milliseconds, kafka_max_wait_ms, 5000, R"( + DECLARE(Milliseconds, kafka_max_wait_ms, 5000, R"( The wait time in milliseconds for reading messages from [Kafka](../../engines/table-engines/integrations/kafka.md/#kafka) before retry. Possible values: @@ -292,130 +292,130 @@ See also: - [Apache Kafka](https://kafka.apache.org/) )", 0) \ - M(Milliseconds, rabbitmq_max_wait_ms, 5000, R"( + DECLARE(Milliseconds, rabbitmq_max_wait_ms, 5000, R"( The wait time for reading from RabbitMQ before retry. )", 0) \ - M(UInt64, poll_interval, DBMS_DEFAULT_POLL_INTERVAL, R"( + DECLARE(UInt64, poll_interval, DBMS_DEFAULT_POLL_INTERVAL, R"( Block at the query wait loop on the server for the specified number of seconds. )", 0) \ - M(UInt64, idle_connection_timeout, 3600, R"( + DECLARE(UInt64, idle_connection_timeout, 3600, R"( Timeout to close idle TCP connections after specified number of seconds. Possible values: - Positive integer (0 - close immediately, after 0 seconds). )", 0) \ - M(UInt64, distributed_connections_pool_size, 1024, R"( + DECLARE(UInt64, distributed_connections_pool_size, 1024, R"( The maximum number of simultaneous connections with remote servers for distributed processing of all queries to a single Distributed table. We recommend setting a value no less than the number of servers in the cluster. )", 0) \ - M(UInt64, connections_with_failover_max_tries, 3, R"( + DECLARE(UInt64, connections_with_failover_max_tries, 3, R"( The maximum number of connection attempts with each replica for the Distributed table engine. )", 0) \ - M(UInt64, s3_strict_upload_part_size, S3::DEFAULT_STRICT_UPLOAD_PART_SIZE, R"( + DECLARE(UInt64, s3_strict_upload_part_size, S3::DEFAULT_STRICT_UPLOAD_PART_SIZE, R"( The exact size of part to upload during multipart upload to S3 (some implementations does not supports variable size parts). )", 0) \ - M(UInt64, azure_strict_upload_part_size, 0, R"( + DECLARE(UInt64, azure_strict_upload_part_size, 0, R"( The exact size of part to upload during multipart upload to Azure blob storage. )", 0) \ - M(UInt64, azure_max_blocks_in_multipart_upload, 50000, R"( + DECLARE(UInt64, azure_max_blocks_in_multipart_upload, 50000, R"( Maximum number of blocks in multipart upload for Azure. )", 0) \ - M(UInt64, s3_min_upload_part_size, S3::DEFAULT_MIN_UPLOAD_PART_SIZE, R"( + DECLARE(UInt64, s3_min_upload_part_size, S3::DEFAULT_MIN_UPLOAD_PART_SIZE, R"( The minimum size of part to upload during multipart upload to S3. )", 0) \ - M(UInt64, s3_max_upload_part_size, S3::DEFAULT_MAX_UPLOAD_PART_SIZE, R"( + DECLARE(UInt64, s3_max_upload_part_size, S3::DEFAULT_MAX_UPLOAD_PART_SIZE, R"( The maximum size of part to upload during multipart upload to S3. )", 0) \ - M(UInt64, azure_min_upload_part_size, 16*1024*1024, R"( + DECLARE(UInt64, azure_min_upload_part_size, 16*1024*1024, R"( The minimum size of part to upload during multipart upload to Azure blob storage. )", 0) \ - M(UInt64, azure_max_upload_part_size, 5ull*1024*1024*1024, R"( + DECLARE(UInt64, azure_max_upload_part_size, 5ull*1024*1024*1024, R"( The maximum size of part to upload during multipart upload to Azure blob storage. )", 0) \ - M(UInt64, s3_upload_part_size_multiply_factor, S3::DEFAULT_UPLOAD_PART_SIZE_MULTIPLY_FACTOR, R"( + DECLARE(UInt64, s3_upload_part_size_multiply_factor, S3::DEFAULT_UPLOAD_PART_SIZE_MULTIPLY_FACTOR, R"( Multiply s3_min_upload_part_size by this factor each time s3_multiply_parts_count_threshold parts were uploaded from a single write to S3. )", 0) \ - M(UInt64, s3_upload_part_size_multiply_parts_count_threshold, S3::DEFAULT_UPLOAD_PART_SIZE_MULTIPLY_PARTS_COUNT_THRESHOLD, R"( + DECLARE(UInt64, s3_upload_part_size_multiply_parts_count_threshold, S3::DEFAULT_UPLOAD_PART_SIZE_MULTIPLY_PARTS_COUNT_THRESHOLD, R"( Each time this number of parts was uploaded to S3, s3_min_upload_part_size is multiplied by s3_upload_part_size_multiply_factor. )", 0) \ - M(UInt64, s3_max_part_number, S3::DEFAULT_MAX_PART_NUMBER, R"( + DECLARE(UInt64, s3_max_part_number, S3::DEFAULT_MAX_PART_NUMBER, R"( Maximum part number number for s3 upload part. )", 0) \ - M(UInt64, s3_max_single_operation_copy_size, S3::DEFAULT_MAX_SINGLE_OPERATION_COPY_SIZE, R"( + DECLARE(UInt64, s3_max_single_operation_copy_size, S3::DEFAULT_MAX_SINGLE_OPERATION_COPY_SIZE, R"( Maximum size for a single copy operation in s3 )", 0) \ - M(UInt64, azure_upload_part_size_multiply_factor, 2, R"( + DECLARE(UInt64, azure_upload_part_size_multiply_factor, 2, R"( Multiply azure_min_upload_part_size by this factor each time azure_multiply_parts_count_threshold parts were uploaded from a single write to Azure blob storage. )", 0) \ - M(UInt64, azure_upload_part_size_multiply_parts_count_threshold, 500, R"( + DECLARE(UInt64, azure_upload_part_size_multiply_parts_count_threshold, 500, R"( Each time this number of parts was uploaded to Azure blob storage, azure_min_upload_part_size is multiplied by azure_upload_part_size_multiply_factor. )", 0) \ - M(UInt64, s3_max_inflight_parts_for_one_file, S3::DEFAULT_MAX_INFLIGHT_PARTS_FOR_ONE_FILE, R"( + DECLARE(UInt64, s3_max_inflight_parts_for_one_file, S3::DEFAULT_MAX_INFLIGHT_PARTS_FOR_ONE_FILE, R"( The maximum number of a concurrent loaded parts in multipart upload request. 0 means unlimited. )", 0) \ - M(UInt64, azure_max_inflight_parts_for_one_file, 20, R"( + DECLARE(UInt64, azure_max_inflight_parts_for_one_file, 20, R"( The maximum number of a concurrent loaded parts in multipart upload request. 0 means unlimited. )", 0) \ - M(UInt64, s3_max_single_part_upload_size, S3::DEFAULT_MAX_SINGLE_PART_UPLOAD_SIZE, R"( + DECLARE(UInt64, s3_max_single_part_upload_size, S3::DEFAULT_MAX_SINGLE_PART_UPLOAD_SIZE, R"( The maximum size of object to upload using singlepart upload to S3. )", 0) \ - M(UInt64, azure_max_single_part_upload_size, 100*1024*1024, R"( + DECLARE(UInt64, azure_max_single_part_upload_size, 100*1024*1024, R"( The maximum size of object to upload using singlepart upload to Azure blob storage. )", 0) \ - M(UInt64, azure_max_single_part_copy_size, 256*1024*1024, R"( + DECLARE(UInt64, azure_max_single_part_copy_size, 256*1024*1024, R"( The maximum size of object to copy using single part copy to Azure blob storage. )", 0) \ - M(UInt64, s3_max_single_read_retries, S3::DEFAULT_MAX_SINGLE_READ_TRIES, R"( + DECLARE(UInt64, s3_max_single_read_retries, S3::DEFAULT_MAX_SINGLE_READ_TRIES, R"( The maximum number of retries during single S3 read. )", 0) \ - M(UInt64, azure_max_single_read_retries, 4, R"( + DECLARE(UInt64, azure_max_single_read_retries, 4, R"( The maximum number of retries during single Azure blob storage read. )", 0) \ - M(UInt64, azure_max_unexpected_write_error_retries, 4, R"( + DECLARE(UInt64, azure_max_unexpected_write_error_retries, 4, R"( The maximum number of retries in case of unexpected errors during Azure blob storage write )", 0) \ - M(UInt64, s3_max_unexpected_write_error_retries, S3::DEFAULT_MAX_UNEXPECTED_WRITE_ERROR_RETRIES, R"( + DECLARE(UInt64, s3_max_unexpected_write_error_retries, S3::DEFAULT_MAX_UNEXPECTED_WRITE_ERROR_RETRIES, R"( The maximum number of retries in case of unexpected errors during S3 write. )", 0) \ - M(UInt64, s3_max_redirects, S3::DEFAULT_MAX_REDIRECTS, R"( + DECLARE(UInt64, s3_max_redirects, S3::DEFAULT_MAX_REDIRECTS, R"( Max number of S3 redirects hops allowed. )", 0) \ - M(UInt64, s3_max_connections, S3::DEFAULT_MAX_CONNECTIONS, R"( + DECLARE(UInt64, s3_max_connections, S3::DEFAULT_MAX_CONNECTIONS, R"( The maximum number of connections per server. )", 0) \ - M(UInt64, s3_max_get_rps, 0, R"( + DECLARE(UInt64, s3_max_get_rps, 0, R"( Limit on S3 GET request per second rate before throttling. Zero means unlimited. )", 0) \ - M(UInt64, s3_max_get_burst, 0, R"( + DECLARE(UInt64, s3_max_get_burst, 0, R"( Max number of requests that can be issued simultaneously before hitting request per second limit. By default (0) equals to `s3_max_get_rps` )", 0) \ - M(UInt64, s3_max_put_rps, 0, R"( + DECLARE(UInt64, s3_max_put_rps, 0, R"( Limit on S3 PUT request per second rate before throttling. Zero means unlimited. )", 0) \ - M(UInt64, s3_max_put_burst, 0, R"( + DECLARE(UInt64, s3_max_put_burst, 0, R"( Max number of requests that can be issued simultaneously before hitting request per second limit. By default (0) equals to `s3_max_put_rps` )", 0) \ - M(UInt64, s3_list_object_keys_size, S3::DEFAULT_LIST_OBJECT_KEYS_SIZE, R"( + DECLARE(UInt64, s3_list_object_keys_size, S3::DEFAULT_LIST_OBJECT_KEYS_SIZE, R"( Maximum number of files that could be returned in batch by ListObject request )", 0) \ - M(Bool, s3_use_adaptive_timeouts, S3::DEFAULT_USE_ADAPTIVE_TIMEOUTS, R"( + DECLARE(Bool, s3_use_adaptive_timeouts, S3::DEFAULT_USE_ADAPTIVE_TIMEOUTS, R"( When set to `true` than for all s3 requests first two attempts are made with low send and receive timeouts. When set to `false` than all attempts are made with identical timeouts. )", 0) \ - M(UInt64, azure_list_object_keys_size, 1000, R"( + DECLARE(UInt64, azure_list_object_keys_size, 1000, R"( Maximum number of files that could be returned in batch by ListObject request )", 0) \ - M(Bool, s3_truncate_on_insert, false, R"( + DECLARE(Bool, s3_truncate_on_insert, false, R"( Enables or disables truncate before inserts in s3 engine tables. If disabled, an exception will be thrown on insert attempts if an S3 object already exists. Possible values: - 0 — `INSERT` query appends new data to the end of the file. - 1 — `INSERT` query replaces existing content of the file with the new data. )", 0) \ - M(Bool, azure_truncate_on_insert, false, R"( + DECLARE(Bool, azure_truncate_on_insert, false, R"( Enables or disables truncate before insert in azure engine tables. )", 0) \ - M(Bool, s3_create_new_file_on_insert, false, R"( + DECLARE(Bool, s3_create_new_file_on_insert, false, R"( Enables or disables creating a new file on each insert in s3 engine tables. If enabled, on each insert a new S3 object will be created with the key, similar to this pattern: initial: `data.Parquet.gz` -> `data.1.Parquet.gz` -> `data.2.Parquet.gz`, etc. @@ -424,111 +424,111 @@ Possible values: - 0 — `INSERT` query appends new data to the end of the file. - 1 — `INSERT` query creates a new file. )", 0) \ - M(Bool, s3_skip_empty_files, false, R"( + DECLARE(Bool, s3_skip_empty_files, false, R"( Enables or disables skipping empty files in [S3](../../engines/table-engines/integrations/s3.md) engine tables. Possible values: - 0 — `SELECT` throws an exception if empty file is not compatible with requested format. - 1 — `SELECT` returns empty result for empty file. )", 0) \ - M(Bool, azure_create_new_file_on_insert, false, R"( + DECLARE(Bool, azure_create_new_file_on_insert, false, R"( Enables or disables creating a new file on each insert in azure engine tables )", 0) \ - M(Bool, s3_check_objects_after_upload, false, R"( + DECLARE(Bool, s3_check_objects_after_upload, false, R"( Check each uploaded object to s3 with head request to be sure that upload was successful )", 0) \ - M(Bool, s3_allow_parallel_part_upload, true, R"( + DECLARE(Bool, s3_allow_parallel_part_upload, true, R"( Use multiple threads for s3 multipart upload. It may lead to slightly higher memory usage )", 0) \ - M(Bool, azure_allow_parallel_part_upload, true, R"( + DECLARE(Bool, azure_allow_parallel_part_upload, true, R"( Use multiple threads for azure multipart upload. )", 0) \ - M(Bool, s3_throw_on_zero_files_match, false, R"( + DECLARE(Bool, s3_throw_on_zero_files_match, false, R"( Throw an error, when ListObjects request cannot match any files )", 0) \ - M(Bool, hdfs_throw_on_zero_files_match, false, R"( + DECLARE(Bool, hdfs_throw_on_zero_files_match, false, R"( Throw an error if matched zero files according to glob expansion rules. Possible values: - 1 — `SELECT` throws an exception. - 0 — `SELECT` returns empty result. )", 0) \ - M(Bool, azure_throw_on_zero_files_match, false, R"( + DECLARE(Bool, azure_throw_on_zero_files_match, false, R"( Throw an error if matched zero files according to glob expansion rules. Possible values: - 1 — `SELECT` throws an exception. - 0 — `SELECT` returns empty result. )", 0) \ - M(Bool, s3_ignore_file_doesnt_exist, false, R"( + DECLARE(Bool, s3_ignore_file_doesnt_exist, false, R"( Ignore absence of file if it does not exist when reading certain keys. Possible values: - 1 — `SELECT` returns empty result. - 0 — `SELECT` throws an exception. )", 0) \ - M(Bool, hdfs_ignore_file_doesnt_exist, false, R"( + DECLARE(Bool, hdfs_ignore_file_doesnt_exist, false, R"( Ignore absence of file if it does not exist when reading certain keys. Possible values: - 1 — `SELECT` returns empty result. - 0 — `SELECT` throws an exception. )", 0) \ - M(Bool, azure_ignore_file_doesnt_exist, false, R"( + DECLARE(Bool, azure_ignore_file_doesnt_exist, false, R"( Ignore absence of file if it does not exist when reading certain keys. Possible values: - 1 — `SELECT` returns empty result. - 0 — `SELECT` throws an exception. )", 0) \ - M(UInt64, azure_sdk_max_retries, 10, R"( + DECLARE(UInt64, azure_sdk_max_retries, 10, R"( Maximum number of retries in azure sdk )", 0) \ - M(UInt64, azure_sdk_retry_initial_backoff_ms, 10, R"( + DECLARE(UInt64, azure_sdk_retry_initial_backoff_ms, 10, R"( Minimal backoff between retries in azure sdk )", 0) \ - M(UInt64, azure_sdk_retry_max_backoff_ms, 1000, R"( + DECLARE(UInt64, azure_sdk_retry_max_backoff_ms, 1000, R"( Maximal backoff between retries in azure sdk )", 0) \ - M(Bool, s3_validate_request_settings, true, R"( + DECLARE(Bool, s3_validate_request_settings, true, R"( Enables s3 request settings validation. Possible values: - 1 — validate settings. - 0 — do not validate settings. )", 0) \ - M(Bool, s3_disable_checksum, S3::DEFAULT_DISABLE_CHECKSUM, R"( + DECLARE(Bool, s3_disable_checksum, S3::DEFAULT_DISABLE_CHECKSUM, R"( Do not calculate a checksum when sending a file to S3. This speeds up writes by avoiding excessive processing passes on a file. It is mostly safe as the data of MergeTree tables is checksummed by ClickHouse anyway, and when S3 is accessed with HTTPS, the TLS layer already provides integrity while transferring through the network. While additional checksums on S3 give defense in depth. )", 0) \ - M(UInt64, s3_retry_attempts, S3::DEFAULT_RETRY_ATTEMPTS, R"( + DECLARE(UInt64, s3_retry_attempts, S3::DEFAULT_RETRY_ATTEMPTS, R"( Setting for Aws::Client::RetryStrategy, Aws::Client does retries itself, 0 means no retries )", 0) \ - M(UInt64, s3_request_timeout_ms, S3::DEFAULT_REQUEST_TIMEOUT_MS, R"( + DECLARE(UInt64, s3_request_timeout_ms, S3::DEFAULT_REQUEST_TIMEOUT_MS, R"( Idleness timeout for sending and receiving data to/from S3. Fail if a single TCP read or write call blocks for this long. )", 0) \ - M(UInt64, s3_connect_timeout_ms, S3::DEFAULT_CONNECT_TIMEOUT_MS, R"( + DECLARE(UInt64, s3_connect_timeout_ms, S3::DEFAULT_CONNECT_TIMEOUT_MS, R"( Connection timeout for host from s3 disks. )", 0) \ - M(Bool, enable_s3_requests_logging, false, R"( + DECLARE(Bool, enable_s3_requests_logging, false, R"( Enable very explicit logging of S3 requests. Makes sense for debug only. )", 0) \ - M(String, s3queue_default_zookeeper_path, "/clickhouse/s3queue/", R"( + DECLARE(String, s3queue_default_zookeeper_path, "/clickhouse/s3queue/", R"( Default zookeeper path prefix for S3Queue engine )", 0) \ - M(Bool, s3queue_enable_logging_to_s3queue_log, false, R"( + DECLARE(Bool, s3queue_enable_logging_to_s3queue_log, false, R"( Enable writing to system.s3queue_log. The value can be overwritten per table with table settings )", 0) \ - M(UInt64, hdfs_replication, 0, R"( + DECLARE(UInt64, hdfs_replication, 0, R"( The actual number of replications can be specified when the hdfs file is created. )", 0) \ - M(Bool, hdfs_truncate_on_insert, false, R"( + DECLARE(Bool, hdfs_truncate_on_insert, false, R"( Enables or disables truncation before an insert in hdfs engine tables. If disabled, an exception will be thrown on an attempt to insert if a file in HDFS already exists. Possible values: - 0 — `INSERT` query appends new data to the end of the file. - 1 — `INSERT` query replaces existing content of the file with the new data. )", 0) \ - M(Bool, hdfs_create_new_file_on_insert, false, R"( + DECLARE(Bool, hdfs_create_new_file_on_insert, false, R"( Enables or disables creating a new file on each insert in HDFS engine tables. If enabled, on each insert a new HDFS file will be created with the name, similar to this pattern: initial: `data.Parquet.gz` -> `data.1.Parquet.gz` -> `data.2.Parquet.gz`, etc. @@ -537,34 +537,34 @@ Possible values: - 0 — `INSERT` query appends new data to the end of the file. - 1 — `INSERT` query creates a new file. )", 0) \ - M(Bool, hdfs_skip_empty_files, false, R"( + DECLARE(Bool, hdfs_skip_empty_files, false, R"( Enables or disables skipping empty files in [HDFS](../../engines/table-engines/integrations/hdfs.md) engine tables. Possible values: - 0 — `SELECT` throws an exception if empty file is not compatible with requested format. - 1 — `SELECT` returns empty result for empty file. )", 0) \ - M(Bool, azure_skip_empty_files, false, R"( + DECLARE(Bool, azure_skip_empty_files, false, R"( Enables or disables skipping empty files in S3 engine. Possible values: - 0 — `SELECT` throws an exception if empty file is not compatible with requested format. - 1 — `SELECT` returns empty result for empty file. )", 0) \ - M(UInt64, hsts_max_age, 0, R"( + DECLARE(UInt64, hsts_max_age, 0, R"( Expired time for HSTS. 0 means disable HSTS. )", 0) \ - M(Bool, extremes, false, R"( + DECLARE(Bool, extremes, false, R"( Whether to count extreme values (the minimums and maximums in columns of a query result). Accepts 0 or 1. By default, 0 (disabled). For more information, see the section “Extreme values”. )", IMPORTANT) \ - M(Bool, use_uncompressed_cache, false, R"( + DECLARE(Bool, use_uncompressed_cache, false, R"( Whether to use a cache of uncompressed blocks. Accepts 0 or 1. By default, 0 (disabled). Using the uncompressed cache (only for tables in the MergeTree family) can significantly reduce latency and increase throughput when working with a large number of short queries. Enable this setting for users who send frequent short requests. Also pay attention to the [uncompressed_cache_size](../../operations/server-configuration-parameters/settings.md/#server-settings-uncompressed_cache_size) configuration parameter (only set in the config file) – the size of uncompressed cache blocks. By default, it is 8 GiB. The uncompressed cache is filled in as needed and the least-used data is automatically deleted. For queries that read at least a somewhat large volume of data (one million rows or more), the uncompressed cache is disabled automatically to save space for truly small queries. This means that you can keep the ‘use_uncompressed_cache’ setting always set to 1. )", 0) \ - M(Bool, replace_running_query, false, R"( + DECLARE(Bool, replace_running_query, false, R"( When using the HTTP interface, the ‘query_id’ parameter can be passed. This is any string that serves as the query identifier. If a query from the same user with the same ‘query_id’ already exists at this time, the behaviour depends on the ‘replace_running_query’ parameter. @@ -574,28 +574,28 @@ If a query from the same user with the same ‘query_id’ already exists at thi Set this parameter to 1 for implementing suggestions for segmentation conditions. After entering the next character, if the old query hasn’t finished yet, it should be cancelled. )", 0) \ - M(UInt64, max_remote_read_network_bandwidth, 0, R"( + DECLARE(UInt64, max_remote_read_network_bandwidth, 0, R"( The maximum speed of data exchange over the network in bytes per second for read. )", 0) \ - M(UInt64, max_remote_write_network_bandwidth, 0, R"( + DECLARE(UInt64, max_remote_write_network_bandwidth, 0, R"( The maximum speed of data exchange over the network in bytes per second for write. )", 0) \ - M(UInt64, max_local_read_bandwidth, 0, R"( + DECLARE(UInt64, max_local_read_bandwidth, 0, R"( The maximum speed of local reads in bytes per second. )", 0) \ - M(UInt64, max_local_write_bandwidth, 0, R"( + DECLARE(UInt64, max_local_write_bandwidth, 0, R"( The maximum speed of local writes in bytes per second. )", 0) \ - M(Bool, stream_like_engine_allow_direct_select, false, R"( + DECLARE(Bool, stream_like_engine_allow_direct_select, false, R"( Allow direct SELECT query for Kafka, RabbitMQ, FileLog, Redis Streams, and NATS engines. In case there are attached materialized views, SELECT query is not allowed even if this setting is enabled. )", 0) \ - M(String, stream_like_engine_insert_queue, "", R"( + DECLARE(String, stream_like_engine_insert_queue, "", R"( When stream-like engine reads from multiple queues, the user will need to select one queue to insert into when writing. Used by Redis Streams and NATS. )", 0) \ - M(Bool, dictionary_validate_primary_key_type, false, R"( + DECLARE(Bool, dictionary_validate_primary_key_type, false, R"( Validate primary key type for dictionaries. By default id type for simple layouts will be implicitly converted to UInt64. )", 0) \ - M(Bool, distributed_insert_skip_read_only_replicas, false, R"( + DECLARE(Bool, distributed_insert_skip_read_only_replicas, false, R"( Enables skipping read-only replicas for INSERT queries into Distributed. Possible values: @@ -603,7 +603,7 @@ Possible values: - 0 — INSERT was as usual, if it will go to read-only replica it will fail - 1 — Initiator will skip read-only replicas before sending data to shards. )", 0) \ - M(Bool, distributed_foreground_insert, false, R"( + DECLARE(Bool, distributed_foreground_insert, false, R"( Enables or disables synchronous data insertion into a [Distributed](../../engines/table-engines/special/distributed.md/#distributed) table. By default, when inserting data into a `Distributed` table, the ClickHouse server sends data to cluster nodes in background mode. When `distributed_foreground_insert=1`, the data is processed synchronously, and the `INSERT` operation succeeds only after all the data is saved on all shards (at least one replica for each shard if `internal_replication` is true). @@ -620,17 +620,17 @@ Cloud default value: `1`. - [Distributed Table Engine](../../engines/table-engines/special/distributed.md/#distributed) - [Managing Distributed Tables](../../sql-reference/statements/system.md/#query-language-system-distributed) )", 0) ALIAS(insert_distributed_sync) \ - M(UInt64, distributed_background_insert_timeout, 0, R"( + DECLARE(UInt64, distributed_background_insert_timeout, 0, R"( Timeout for insert query into distributed. Setting is used only with insert_distributed_sync enabled. Zero value means no timeout. )", 0) ALIAS(insert_distributed_timeout) \ - M(Milliseconds, distributed_background_insert_sleep_time_ms, 100, R"( + DECLARE(Milliseconds, distributed_background_insert_sleep_time_ms, 100, R"( Base interval for the [Distributed](../../engines/table-engines/special/distributed.md) table engine to send data. The actual interval grows exponentially in the event of errors. Possible values: - A positive integer number of milliseconds. )", 0) ALIAS(distributed_directory_monitor_sleep_time_ms) \ - M(Milliseconds, distributed_background_insert_max_sleep_time_ms, 30000, R"( + DECLARE(Milliseconds, distributed_background_insert_max_sleep_time_ms, 30000, R"( Maximum interval for the [Distributed](../../engines/table-engines/special/distributed.md) table engine to send data. Limits exponential growth of the interval set in the [distributed_background_insert_sleep_time_ms](#distributed_background_insert_sleep_time_ms) setting. Possible values: @@ -638,7 +638,7 @@ Possible values: - A positive integer number of milliseconds. )", 0) ALIAS(distributed_directory_monitor_max_sleep_time_ms) \ \ - M(Bool, distributed_background_insert_batch, false, R"( + DECLARE(Bool, distributed_background_insert_batch, false, R"( Enables/disables inserted data sending in batches. When batch sending is enabled, the [Distributed](../../engines/table-engines/special/distributed.md) table engine tries to send multiple files of inserted data in one operation instead of sending them separately. Batch sending improves cluster performance by better-utilizing server and network resources. @@ -648,7 +648,7 @@ Possible values: - 1 — Enabled. - 0 — Disabled. )", 0) ALIAS(distributed_directory_monitor_batch_inserts) \ - M(Bool, distributed_background_insert_split_batch_on_failure, false, R"( + DECLARE(Bool, distributed_background_insert_split_batch_on_failure, false, R"( Enables/disables splitting batches on failures. Sometimes sending particular batch to the remote shard may fail, because of some complex pipeline after (i.e. `MATERIALIZED VIEW` with `GROUP BY`) due to `Memory limit exceeded` or similar errors. In this case, retrying will not help (and this will stuck distributed sends for the table) but sending files from that batch one by one may succeed INSERT. @@ -669,7 +669,7 @@ You should not rely on automatic batch splitting, since this may hurt performanc ::: )", 0) ALIAS(distributed_directory_monitor_split_batch_on_failure) \ \ - M(Bool, optimize_move_to_prewhere, true, R"( + DECLARE(Bool, optimize_move_to_prewhere, true, R"( Enables or disables automatic [PREWHERE](../../sql-reference/statements/select/prewhere.md) optimization in [SELECT](../../sql-reference/statements/select/index.md) queries. Works only for [*MergeTree](../../engines/table-engines/mergetree-family/index.md) tables. @@ -679,7 +679,7 @@ Possible values: - 0 — Automatic `PREWHERE` optimization is disabled. - 1 — Automatic `PREWHERE` optimization is enabled. )", 0) \ - M(Bool, optimize_move_to_prewhere_if_final, false, R"( + DECLARE(Bool, optimize_move_to_prewhere_if_final, false, R"( Enables or disables automatic [PREWHERE](../../sql-reference/statements/select/prewhere.md) optimization in [SELECT](../../sql-reference/statements/select/index.md) queries with [FINAL](../../sql-reference/statements/select/from.md#select-from-final) modifier. Works only for [*MergeTree](../../engines/table-engines/mergetree-family/index.md) tables. @@ -693,20 +693,20 @@ Possible values: - [optimize_move_to_prewhere](#optimize_move_to_prewhere) setting )", 0) \ - M(Bool, move_all_conditions_to_prewhere, true, R"( + DECLARE(Bool, move_all_conditions_to_prewhere, true, R"( Move all viable conditions from WHERE to PREWHERE )", 0) \ - M(Bool, enable_multiple_prewhere_read_steps, true, R"( + DECLARE(Bool, enable_multiple_prewhere_read_steps, true, R"( Move more conditions from WHERE to PREWHERE and do reads from disk and filtering in multiple steps if there are multiple conditions combined with AND )", 0) \ - M(Bool, move_primary_key_columns_to_end_of_prewhere, true, R"( + DECLARE(Bool, move_primary_key_columns_to_end_of_prewhere, true, R"( Move PREWHERE conditions containing primary key columns to the end of AND chain. It is likely that these conditions are taken into account during primary key analysis and thus will not contribute a lot to PREWHERE filtering. )", 0) \ - M(Bool, allow_reorder_prewhere_conditions, true, R"( + DECLARE(Bool, allow_reorder_prewhere_conditions, true, R"( When moving conditions from WHERE to PREWHERE, allow reordering them to optimize filtering )", 0) \ \ - M(UInt64, alter_sync, 1, R"( + DECLARE(UInt64, alter_sync, 1, R"( Allows to set up waiting for actions to be executed on replicas by [ALTER](../../sql-reference/statements/alter/index.md), [OPTIMIZE](../../sql-reference/statements/optimize.md) or [TRUNCATE](../../sql-reference/statements/truncate.md) queries. Possible values: @@ -721,7 +721,7 @@ Cloud default value: `0`. `alter_sync` is applicable to `Replicated` tables only, it does nothing to alters of not `Replicated` tables. ::: )", 0) ALIAS(replication_alter_partitions_sync) \ - M(Int64, replication_wait_for_inactive_replica_timeout, 120, R"( + DECLARE(Int64, replication_wait_for_inactive_replica_timeout, 120, R"( Specifies how long (in seconds) to wait for inactive replicas to execute [ALTER](../../sql-reference/statements/alter/index.md), [OPTIMIZE](../../sql-reference/statements/optimize.md) or [TRUNCATE](../../sql-reference/statements/truncate.md) queries. Possible values: @@ -730,11 +730,11 @@ Possible values: - Negative integer — Wait for unlimited time. - Positive integer — The number of seconds to wait. )", 0) \ - M(Bool, alter_move_to_space_execute_async, false, R"( + DECLARE(Bool, alter_move_to_space_execute_async, false, R"( Execute ALTER TABLE MOVE ... TO [DISK|VOLUME] asynchronously )", 0) \ \ - M(LoadBalancing, load_balancing, LoadBalancing::RANDOM, R"( + DECLARE(LoadBalancing, load_balancing, LoadBalancing::RANDOM, R"( Specifies the algorithm of replicas selection that is used for distributed query processing. ClickHouse supports the following algorithms of choosing replicas: @@ -821,20 +821,20 @@ load_balancing = round_robin This algorithm uses a round-robin policy across replicas with the same number of errors (only the queries with `round_robin` policy is accounted). )", 0) \ - M(UInt64, load_balancing_first_offset, 0, R"( + DECLARE(UInt64, load_balancing_first_offset, 0, R"( Which replica to preferably send a query when FIRST_OR_RANDOM load balancing strategy is used. )", 0) \ \ - M(TotalsMode, totals_mode, TotalsMode::AFTER_HAVING_EXCLUSIVE, R"( + DECLARE(TotalsMode, totals_mode, TotalsMode::AFTER_HAVING_EXCLUSIVE, R"( How to calculate TOTALS when HAVING is present, as well as when max_rows_to_group_by and group_by_overflow_mode = ‘any’ are present. See the section “WITH TOTALS modifier”. )", IMPORTANT) \ - M(Float, totals_auto_threshold, 0.5, R"( + DECLARE(Float, totals_auto_threshold, 0.5, R"( The threshold for `totals_mode = 'auto'`. See the section “WITH TOTALS modifier”. )", 0) \ \ - M(Bool, allow_suspicious_low_cardinality_types, false, R"( + DECLARE(Bool, allow_suspicious_low_cardinality_types, false, R"( Allows or restricts using [LowCardinality](../../sql-reference/data-types/lowcardinality.md) with data types with fixed size of 8 bytes or less: numeric data types and `FixedString(8_bytes_or_less)`. For small fixed values using of `LowCardinality` is usually inefficient, because ClickHouse stores a numeric index for each row. As a result: @@ -850,28 +850,28 @@ Possible values: - 1 — Usage of `LowCardinality` is not restricted. - 0 — Usage of `LowCardinality` is restricted. )", 0) \ - M(Bool, allow_suspicious_fixed_string_types, false, R"( + DECLARE(Bool, allow_suspicious_fixed_string_types, false, R"( In CREATE TABLE statement allows creating columns of type FixedString(n) with n > 256. FixedString with length >= 256 is suspicious and most likely indicates a misuse )", 0) \ - M(Bool, allow_suspicious_indices, false, R"( + DECLARE(Bool, allow_suspicious_indices, false, R"( Reject primary/secondary indexes and sorting keys with identical expressions )", 0) \ - M(Bool, allow_suspicious_ttl_expressions, false, R"( + DECLARE(Bool, allow_suspicious_ttl_expressions, false, R"( Reject TTL expressions that don't depend on any of table's columns. It indicates a user error most of the time. )", 0) \ - M(Bool, allow_suspicious_variant_types, false, R"( + DECLARE(Bool, allow_suspicious_variant_types, false, R"( In CREATE TABLE statement allows specifying Variant type with similar variant types (for example, with different numeric or date types). Enabling this setting may introduce some ambiguity when working with values with similar types. )", 0) \ - M(Bool, allow_suspicious_primary_key, false, R"( + DECLARE(Bool, allow_suspicious_primary_key, false, R"( Allow suspicious `PRIMARY KEY`/`ORDER BY` for MergeTree (i.e. SimpleAggregateFunction). )", 0) \ - M(Bool, compile_expressions, false, R"( + DECLARE(Bool, compile_expressions, false, R"( Compile some scalar functions and operators to native code. Due to a bug in the LLVM compiler infrastructure, on AArch64 machines, it is known to lead to a nullptr dereference and, consequently, server crash. Do not enable this setting. )", 0) \ - M(UInt64, min_count_to_compile_expression, 3, R"( + DECLARE(UInt64, min_count_to_compile_expression, 3, R"( Minimum count of executing same expression before it is get compiled. )", 0) \ - M(Bool, compile_aggregate_expressions, true, R"( + DECLARE(Bool, compile_aggregate_expressions, true, R"( Enables or disables JIT-compilation of aggregate functions to native code. Enabling this setting can improve the performance. Possible values: @@ -883,7 +883,7 @@ Possible values: - [min_count_to_compile_aggregate_expression](#min_count_to_compile_aggregate_expression) )", 0) \ - M(UInt64, min_count_to_compile_aggregate_expression, 3, R"( + DECLARE(UInt64, min_count_to_compile_aggregate_expression, 3, R"( The minimum number of identical aggregate expressions to start JIT-compilation. Works only if the [compile_aggregate_expressions](#compile_aggregate_expressions) setting is enabled. Possible values: @@ -891,28 +891,28 @@ Possible values: - Positive integer. - 0 — Identical aggregate expressions are always JIT-compiled. )", 0) \ - M(Bool, compile_sort_description, true, R"( + DECLARE(Bool, compile_sort_description, true, R"( Compile sort description to native code. )", 0) \ - M(UInt64, min_count_to_compile_sort_description, 3, R"( + DECLARE(UInt64, min_count_to_compile_sort_description, 3, R"( The number of identical sort descriptions before they are JIT-compiled )", 0) \ - M(UInt64, group_by_two_level_threshold, 100000, R"( + DECLARE(UInt64, group_by_two_level_threshold, 100000, R"( From what number of keys, a two-level aggregation starts. 0 - the threshold is not set. )", 0) \ - M(UInt64, group_by_two_level_threshold_bytes, 50000000, R"( + DECLARE(UInt64, group_by_two_level_threshold_bytes, 50000000, R"( From what size of the aggregation state in bytes, a two-level aggregation begins to be used. 0 - the threshold is not set. Two-level aggregation is used when at least one of the thresholds is triggered. )", 0) \ - M(Bool, distributed_aggregation_memory_efficient, true, R"( + DECLARE(Bool, distributed_aggregation_memory_efficient, true, R"( Is the memory-saving mode of distributed aggregation enabled. )", 0) \ - M(UInt64, aggregation_memory_efficient_merge_threads, 0, R"( + DECLARE(UInt64, aggregation_memory_efficient_merge_threads, 0, R"( Number of threads to use for merge intermediate aggregation results in memory efficient mode. When bigger, then more memory is consumed. 0 means - same as 'max_threads'. )", 0) \ - M(Bool, enable_memory_bound_merging_of_aggregation_results, true, R"( + DECLARE(Bool, enable_memory_bound_merging_of_aggregation_results, true, R"( Enable memory bound merging strategy for aggregation. )", 0) \ - M(Bool, enable_positional_arguments, true, R"( + DECLARE(Bool, enable_positional_arguments, true, R"( Enables or disables supporting positional arguments for [GROUP BY](../../sql-reference/statements/select/group-by.md), [LIMIT BY](../../sql-reference/statements/select/limit-by.md), [ORDER BY](../../sql-reference/statements/select/order-by.md) statements. Possible values: @@ -942,7 +942,7 @@ Result: └─────┴─────┴───────┘ ``` )", 0) \ - M(Bool, enable_extended_results_for_datetime_functions, false, R"( + DECLARE(Bool, enable_extended_results_for_datetime_functions, false, R"( Enables or disables returning results of type: - `Date32` with extended range (compared to type `Date`) for functions [toStartOfYear](../../sql-reference/functions/date-time-functions.md#tostartofyear), [toStartOfISOYear](../../sql-reference/functions/date-time-functions.md#tostartofisoyear), [toStartOfQuarter](../../sql-reference/functions/date-time-functions.md#tostartofquarter), [toStartOfMonth](../../sql-reference/functions/date-time-functions.md#tostartofmonth), [toLastDayOfMonth](../../sql-reference/functions/date-time-functions.md#tolastdayofmonth), [toStartOfWeek](../../sql-reference/functions/date-time-functions.md#tostartofweek), [toLastDayOfWeek](../../sql-reference/functions/date-time-functions.md#tolastdayofweek) and [toMonday](../../sql-reference/functions/date-time-functions.md#tomonday). - `DateTime64` with extended range (compared to type `DateTime`) for functions [toStartOfDay](../../sql-reference/functions/date-time-functions.md#tostartofday), [toStartOfHour](../../sql-reference/functions/date-time-functions.md#tostartofhour), [toStartOfMinute](../../sql-reference/functions/date-time-functions.md#tostartofminute), [toStartOfFiveMinutes](../../sql-reference/functions/date-time-functions.md#tostartoffiveminutes), [toStartOfTenMinutes](../../sql-reference/functions/date-time-functions.md#tostartoftenminutes), [toStartOfFifteenMinutes](../../sql-reference/functions/date-time-functions.md#tostartoffifteenminutes) and [timeSlot](../../sql-reference/functions/date-time-functions.md#timeslot). @@ -952,10 +952,10 @@ Possible values: - 0 — Functions return `Date` or `DateTime` for all types of arguments. - 1 — Functions return `Date32` or `DateTime64` for `Date32` or `DateTime64` arguments and `Date` or `DateTime` otherwise. )", 0) \ - M(Bool, allow_nonconst_timezone_arguments, false, R"( + DECLARE(Bool, allow_nonconst_timezone_arguments, false, R"( Allow non-const timezone arguments in certain time-related functions like toTimeZone(), fromUnixTimestamp*(), snowflakeToDateTime*() )", 0) \ - M(Bool, function_locate_has_mysql_compatible_argument_order, true, R"( + DECLARE(Bool, function_locate_has_mysql_compatible_argument_order, true, R"( Controls the order of arguments in function [locate](../../sql-reference/functions/string-search-functions.md#locate). Possible values: @@ -964,7 +964,7 @@ Possible values: - 1 — Function `locate` accepts arguments `(needle, haystack, [, start_pos])` (MySQL-compatible behavior) )", 0) \ \ - M(Bool, group_by_use_nulls, false, R"( + DECLARE(Bool, group_by_use_nulls, false, R"( Changes the way the [GROUP BY clause](/docs/en/sql-reference/statements/select/group-by.md) treats the types of aggregation keys. When the `ROLLUP`, `CUBE`, or `GROUPING SETS` specifiers are used, some aggregation keys may not be used to produce some result rows. Columns for these keys are filled with either default value or `NULL` in corresponding rows depending on this setting. @@ -979,7 +979,7 @@ See also: - [GROUP BY clause](/docs/en/sql-reference/statements/select/group-by.md) )", 0) \ \ - M(Bool, skip_unavailable_shards, false, R"( + DECLARE(Bool, skip_unavailable_shards, false, R"( Enables or disables silently skipping of unavailable shards. Shard is considered unavailable if all its replicas are unavailable. A replica is unavailable in the following cases: @@ -1007,7 +1007,7 @@ Possible values: If a shard is unavailable, ClickHouse throws an exception. )", 0) \ \ - M(UInt64, parallel_distributed_insert_select, 0, R"( + DECLARE(UInt64, parallel_distributed_insert_select, 0, R"( Enables parallel distributed `INSERT ... SELECT` query. If we execute `INSERT INTO distributed_table_a SELECT ... FROM distributed_table_b` queries and both tables use the same cluster, and both tables are either [replicated](../../engines/table-engines/mergetree-family/replication.md) or non-replicated, then this query is processed locally on every shard. @@ -1018,7 +1018,7 @@ Possible values: - 1 — `SELECT` will be executed on each shard from the underlying table of the distributed engine. - 2 — `SELECT` and `INSERT` will be executed on each shard from/to the underlying table of the distributed engine. )", 0) \ - M(UInt64, distributed_group_by_no_merge, 0, R"( + DECLARE(UInt64, distributed_group_by_no_merge, 0, R"( Do not merge aggregation states from different servers for distributed query processing, you can use this in case it is for certain that there are different keys on different shards Possible values: @@ -1056,7 +1056,7 @@ FORMAT PrettyCompactMonoBlock └───────┘ ``` )", 0) \ - M(UInt64, distributed_push_down_limit, 1, R"( + DECLARE(UInt64, distributed_push_down_limit, 1, R"( Enables or disables [LIMIT](#limit) applying on each shard separately. This will allow to avoid: @@ -1081,7 +1081,7 @@ See also: - [optimize_skip_unused_shards](#optimize-skip-unused-shards) - [optimize_distributed_group_by_sharding_key](#optimize-distributed-group-by-sharding-key) )", 0) \ - M(Bool, optimize_distributed_group_by_sharding_key, true, R"( + DECLARE(Bool, optimize_distributed_group_by_sharding_key, true, R"( Optimize `GROUP BY sharding_key` queries, by avoiding costly aggregation on the initiator server (which will reduce memory usage for the query on the initiator server). The following types of queries are supported (and all combinations of them): @@ -1114,12 +1114,12 @@ See also: Right now it requires `optimize_skip_unused_shards` (the reason behind this is that one day it may be enabled by default, and it will work correctly only if data was inserted via Distributed table, i.e. data is distributed according to sharding_key). ::: )", 0) \ - M(UInt64, optimize_skip_unused_shards_limit, 1000, R"( + DECLARE(UInt64, optimize_skip_unused_shards_limit, 1000, R"( Limit for number of sharding key values, turns off `optimize_skip_unused_shards` if the limit is reached. Too many values may require significant amount for processing, while the benefit is doubtful, since if you have huge number of values in `IN (...)`, then most likely the query will be sent to all shards anyway. )", 0) \ - M(Bool, optimize_skip_unused_shards, false, R"( + DECLARE(Bool, optimize_skip_unused_shards, false, R"( Enables or disables skipping of unused shards for [SELECT](../../sql-reference/statements/select/index.md) queries that have sharding key condition in `WHERE/PREWHERE` (assuming that the data is distributed by sharding key, otherwise a query yields incorrect result). Possible values: @@ -1127,7 +1127,7 @@ Possible values: - 0 — Disabled. - 1 — Enabled. )", 0) \ - M(Bool, optimize_skip_unused_shards_rewrite_in, true, R"( + DECLARE(Bool, optimize_skip_unused_shards_rewrite_in, true, R"( Rewrite IN in query for remote shards to exclude values that does not belong to the shard (requires optimize_skip_unused_shards). Possible values: @@ -1135,7 +1135,7 @@ Possible values: - 0 — Disabled. - 1 — Enabled. )", 0) \ - M(Bool, allow_nondeterministic_optimize_skip_unused_shards, false, R"( + DECLARE(Bool, allow_nondeterministic_optimize_skip_unused_shards, false, R"( Allow nondeterministic (like `rand` or `dictGet`, since later has some caveats with updates) functions in sharding key. Possible values: @@ -1143,7 +1143,7 @@ Possible values: - 0 — Disallowed. - 1 — Allowed. )", 0) \ - M(UInt64, force_optimize_skip_unused_shards, 0, R"( + DECLARE(UInt64, force_optimize_skip_unused_shards, 0, R"( Enables or disables query execution if [optimize_skip_unused_shards](#optimize-skip-unused-shards) is enabled and skipping of unused shards is not possible. If the skipping is not possible and the setting is enabled, an exception will be thrown. Possible values: @@ -1152,7 +1152,7 @@ Possible values: - 1 — Enabled. Query execution is disabled only if the table has a sharding key. - 2 — Enabled. Query execution is disabled regardless of whether a sharding key is defined for the table. )", 0) \ - M(UInt64, optimize_skip_unused_shards_nesting, 0, R"( + DECLARE(UInt64, optimize_skip_unused_shards_nesting, 0, R"( Controls [`optimize_skip_unused_shards`](#optimize-skip-unused-shards) (hence still requires [`optimize_skip_unused_shards`](#optimize-skip-unused-shards)) depends on the nesting level of the distributed query (case when you have `Distributed` table that look into another `Distributed` table). Possible values: @@ -1161,7 +1161,7 @@ Possible values: - 1 — Enables `optimize_skip_unused_shards` only for the first level. - 2 — Enables `optimize_skip_unused_shards` up to the second level. )", 0) \ - M(UInt64, force_optimize_skip_unused_shards_nesting, 0, R"( + DECLARE(UInt64, force_optimize_skip_unused_shards_nesting, 0, R"( Controls [`force_optimize_skip_unused_shards`](#force-optimize-skip-unused-shards) (hence still requires [`force_optimize_skip_unused_shards`](#force-optimize-skip-unused-shards)) depends on the nesting level of the distributed query (case when you have `Distributed` table that look into another `Distributed` table). Possible values: @@ -1171,7 +1171,7 @@ Possible values: - 2 — Enables `force_optimize_skip_unused_shards` up to the second level. )", 0) \ \ - M(Bool, input_format_parallel_parsing, true, R"( + DECLARE(Bool, input_format_parallel_parsing, true, R"( Enables or disables order-preserving parallel parsing of data formats. Supported only for [TSV](../../interfaces/formats.md/#tabseparated), [TSKV](../../interfaces/formats.md/#tskv), [CSV](../../interfaces/formats.md/#csv) and [JSONEachRow](../../interfaces/formats.md/#jsoneachrow) formats. Possible values: @@ -1179,13 +1179,13 @@ Possible values: - 1 — Enabled. - 0 — Disabled. )", 0) \ - M(UInt64, min_chunk_bytes_for_parallel_parsing, (10 * 1024 * 1024), R"( + DECLARE(UInt64, min_chunk_bytes_for_parallel_parsing, (10 * 1024 * 1024), R"( - Type: unsigned int - Default value: 1 MiB The minimum chunk size in bytes, which each thread will parse in parallel. )", 0) \ - M(Bool, output_format_parallel_formatting, true, R"( + DECLARE(Bool, output_format_parallel_formatting, true, R"( Enables or disables parallel formatting of data formats. Supported only for [TSV](../../interfaces/formats.md/#tabseparated), [TSKV](../../interfaces/formats.md/#tskv), [CSV](../../interfaces/formats.md/#csv) and [JSONEachRow](../../interfaces/formats.md/#jsoneachrow) formats. Possible values: @@ -1193,56 +1193,56 @@ Possible values: - 1 — Enabled. - 0 — Disabled. )", 0) \ - M(UInt64, output_format_compression_level, 3, R"( + DECLARE(UInt64, output_format_compression_level, 3, R"( Default compression level if query output is compressed. The setting is applied when `SELECT` query has `INTO OUTFILE` or when writing to table functions `file`, `url`, `hdfs`, `s3`, or `azureBlobStorage`. Possible values: from `1` to `22` )", 0) \ - M(UInt64, output_format_compression_zstd_window_log, 0, R"( + DECLARE(UInt64, output_format_compression_zstd_window_log, 0, R"( Can be used when the output compression method is `zstd`. If greater than `0`, this setting explicitly sets compression window size (power of `2`) and enables a long-range mode for zstd compression. This can help to achieve a better compression ratio. Possible values: non-negative numbers. Note that if the value is too small or too big, `zstdlib` will throw an exception. Typical values are from `20` (window size = `1MB`) to `30` (window size = `1GB`). )", 0) \ - M(Bool, enable_parsing_to_custom_serialization, true, R"( + DECLARE(Bool, enable_parsing_to_custom_serialization, true, R"( If true then data can be parsed directly to columns with custom serialization (e.g. Sparse) according to hints for serialization got from the table. )", 0) \ \ - M(UInt64, merge_tree_min_rows_for_concurrent_read, (20 * 8192), R"( + DECLARE(UInt64, merge_tree_min_rows_for_concurrent_read, (20 * 8192), R"( If the number of rows to be read from a file of a [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) table exceeds `merge_tree_min_rows_for_concurrent_read` then ClickHouse tries to perform a concurrent reading from this file on several threads. Possible values: - Positive integer. )", 0) \ - M(UInt64, merge_tree_min_bytes_for_concurrent_read, (24 * 10 * 1024 * 1024), R"( + DECLARE(UInt64, merge_tree_min_bytes_for_concurrent_read, (24 * 10 * 1024 * 1024), R"( If the number of bytes to read from one file of a [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md)-engine table exceeds `merge_tree_min_bytes_for_concurrent_read`, then ClickHouse tries to concurrently read from this file in several threads. Possible value: - Positive integer. )", 0) \ - M(UInt64, merge_tree_min_rows_for_seek, 0, R"( + DECLARE(UInt64, merge_tree_min_rows_for_seek, 0, R"( If the distance between two data blocks to be read in one file is less than `merge_tree_min_rows_for_seek` rows, then ClickHouse does not seek through the file but reads the data sequentially. Possible values: - Any positive integer. )", 0) \ - M(UInt64, merge_tree_min_bytes_for_seek, 0, R"( + DECLARE(UInt64, merge_tree_min_bytes_for_seek, 0, R"( If the distance between two data blocks to be read in one file is less than `merge_tree_min_bytes_for_seek` bytes, then ClickHouse sequentially reads a range of file that contains both blocks, thus avoiding extra seek. Possible values: - Any positive integer. )", 0) \ - M(UInt64, merge_tree_coarse_index_granularity, 8, R"( + DECLARE(UInt64, merge_tree_coarse_index_granularity, 8, R"( When searching for data, ClickHouse checks the data marks in the index file. If ClickHouse finds that required keys are in some range, it divides this range into `merge_tree_coarse_index_granularity` subranges and searches the required keys there recursively. Possible values: - Any positive even integer. )", 0) \ - M(UInt64, merge_tree_max_rows_to_use_cache, (128 * 8192), R"( + DECLARE(UInt64, merge_tree_max_rows_to_use_cache, (128 * 8192), R"( If ClickHouse should read more than `merge_tree_max_rows_to_use_cache` rows in one query, it does not use the cache of uncompressed blocks. The cache of uncompressed blocks stores data extracted for queries. ClickHouse uses this cache to speed up responses to repeated small queries. This setting protects the cache from trashing by queries that read a large amount of data. The [uncompressed_cache_size](../../operations/server-configuration-parameters/settings.md/#server-settings-uncompressed_cache_size) server setting defines the size of the cache of uncompressed blocks. @@ -1251,7 +1251,7 @@ Possible values: - Any positive integer. )", 0) \ - M(UInt64, merge_tree_max_bytes_to_use_cache, (192 * 10 * 1024 * 1024), R"( + DECLARE(UInt64, merge_tree_max_bytes_to_use_cache, (192 * 10 * 1024 * 1024), R"( If ClickHouse should read more than `merge_tree_max_bytes_to_use_cache` bytes in one query, it does not use the cache of uncompressed blocks. The cache of uncompressed blocks stores data extracted for queries. ClickHouse uses this cache to speed up responses to repeated small queries. This setting protects the cache from trashing by queries that read a large amount of data. The [uncompressed_cache_size](../../operations/server-configuration-parameters/settings.md/#server-settings-uncompressed_cache_size) server setting defines the size of the cache of uncompressed blocks. @@ -1260,20 +1260,20 @@ Possible values: - Any positive integer. )", 0) \ - M(Bool, do_not_merge_across_partitions_select_final, false, R"( + DECLARE(Bool, do_not_merge_across_partitions_select_final, false, R"( Merge parts only in one partition in select final )", 0) \ - M(Bool, split_parts_ranges_into_intersecting_and_non_intersecting_final, true, R"( + DECLARE(Bool, split_parts_ranges_into_intersecting_and_non_intersecting_final, true, R"( Split parts ranges into intersecting and non intersecting during FINAL optimization )", 0) \ - M(Bool, split_intersecting_parts_ranges_into_layers_final, true, R"( + DECLARE(Bool, split_intersecting_parts_ranges_into_layers_final, true, R"( Split intersecting parts ranges into layers during FINAL optimization )", 0) \ \ - M(UInt64, mysql_max_rows_to_insert, 65536, R"( + DECLARE(UInt64, mysql_max_rows_to_insert, 65536, R"( The maximum number of rows in MySQL batch insertion of the MySQL storage engine )", 0) \ - M(Bool, mysql_map_string_to_text_in_show_columns, true, R"( + DECLARE(Bool, mysql_map_string_to_text_in_show_columns, true, R"( When enabled, [String](../../sql-reference/data-types/string.md) ClickHouse data type will be displayed as `TEXT` in [SHOW COLUMNS](../../sql-reference/statements/show.md#show_columns). Has an effect only when the connection is made through the MySQL wire protocol. @@ -1281,7 +1281,7 @@ Has an effect only when the connection is made through the MySQL wire protocol. - 0 - Use `BLOB`. - 1 - Use `TEXT`. )", 0) \ - M(Bool, mysql_map_fixed_string_to_text_in_show_columns, true, R"( + DECLARE(Bool, mysql_map_fixed_string_to_text_in_show_columns, true, R"( When enabled, [FixedString](../../sql-reference/data-types/fixedstring.md) ClickHouse data type will be displayed as `TEXT` in [SHOW COLUMNS](../../sql-reference/statements/show.md#show_columns). Has an effect only when the connection is made through the MySQL wire protocol. @@ -1290,14 +1290,14 @@ Has an effect only when the connection is made through the MySQL wire protocol. - 1 - Use `TEXT`. )", 0) \ \ - M(UInt64, optimize_min_equality_disjunction_chain_length, 3, R"( + DECLARE(UInt64, optimize_min_equality_disjunction_chain_length, 3, R"( The minimum length of the expression `expr = x1 OR ... expr = xN` for optimization )", 0) \ - M(UInt64, optimize_min_inequality_conjunction_chain_length, 3, R"( + DECLARE(UInt64, optimize_min_inequality_conjunction_chain_length, 3, R"( The minimum length of the expression `expr <> x1 AND ... expr <> xN` for optimization )", 0) \ \ - M(UInt64, min_bytes_to_use_direct_io, 0, R"( + DECLARE(UInt64, min_bytes_to_use_direct_io, 0, R"( The minimum data volume required for using direct I/O access to the storage disk. ClickHouse uses this setting when reading data from tables. If the total storage volume of all the data to be read exceeds `min_bytes_to_use_direct_io` bytes, then ClickHouse reads the data from the storage disk with the `O_DIRECT` option. @@ -1307,7 +1307,7 @@ Possible values: - 0 — Direct I/O is disabled. - Positive integer. )", 0) \ - M(UInt64, min_bytes_to_use_mmap_io, 0, R"( + DECLARE(UInt64, min_bytes_to_use_mmap_io, 0, R"( This is an experimental setting. Sets the minimum amount of memory for reading large files without copying data from the kernel to userspace. Recommended threshold is about 64 MB, because [mmap/munmap](https://en.wikipedia.org/wiki/Mmap) is slow. It makes sense only for large files and helps only if data reside in the page cache. Possible values: @@ -1315,25 +1315,25 @@ Possible values: - Positive integer. - 0 — Big files read with only copying data from kernel to userspace. )", 0) \ - M(Bool, checksum_on_read, true, R"( + DECLARE(Bool, checksum_on_read, true, R"( Validate checksums on reading. It is enabled by default and should be always enabled in production. Please do not expect any benefits in disabling this setting. It may only be used for experiments and benchmarks. The setting is only applicable for tables of MergeTree family. Checksums are always validated for other table engines and when receiving data over the network. )", 0) \ \ - M(Bool, force_index_by_date, false, R"( + DECLARE(Bool, force_index_by_date, false, R"( Disables query execution if the index can’t be used by date. Works with tables in the MergeTree family. If `force_index_by_date=1`, ClickHouse checks whether the query has a date key condition that can be used for restricting data ranges. If there is no suitable condition, it throws an exception. However, it does not check whether the condition reduces the amount of data to read. For example, the condition `Date != ' 2000-01-01 '` is acceptable even when it matches all the data in the table (i.e., running the query requires a full scan). For more information about ranges of data in MergeTree tables, see [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md). )", 0) \ - M(Bool, force_primary_key, false, R"( + DECLARE(Bool, force_primary_key, false, R"( Disables query execution if indexing by the primary key is not possible. Works with tables in the MergeTree family. If `force_primary_key=1`, ClickHouse checks to see if the query has a primary key condition that can be used for restricting data ranges. If there is no suitable condition, it throws an exception. However, it does not check whether the condition reduces the amount of data to read. For more information about data ranges in MergeTree tables, see [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md). )", 0) \ - M(Bool, use_skip_indexes, true, R"( + DECLARE(Bool, use_skip_indexes, true, R"( Use data skipping indexes during query execution. Possible values: @@ -1341,7 +1341,7 @@ Possible values: - 0 — Disabled. - 1 — Enabled. )", 0) \ - M(Bool, use_skip_indexes_if_final, false, R"( + DECLARE(Bool, use_skip_indexes_if_final, false, R"( Controls whether skipping indexes are used when executing a query with the FINAL modifier. By default, this setting is disabled because skip indexes may exclude rows (granules) containing the latest data, which could lead to incorrect results. When enabled, skipping indexes are applied even with the FINAL modifier, potentially improving performance but with the risk of missing recent updates. @@ -1351,13 +1351,13 @@ Possible values: - 0 — Disabled. - 1 — Enabled. )", 0) \ - M(Bool, materialize_skip_indexes_on_insert, true, R"( + DECLARE(Bool, materialize_skip_indexes_on_insert, true, R"( If true skip indexes are calculated on inserts, otherwise skip indexes will be calculated only during merges )", 0) \ - M(Bool, materialize_statistics_on_insert, true, R"( + DECLARE(Bool, materialize_statistics_on_insert, true, R"( If true statistics are calculated on inserts, otherwise statistics will be calculated only during merges )", 0) \ - M(String, ignore_data_skipping_indices, "", R"( + DECLARE(String, ignore_data_skipping_indices, "", R"( Ignores the skipping indexes specified if used by the query. Consider the following example: @@ -1442,7 +1442,7 @@ Expression ((Projection + Before ORDER BY)) Works with tables in the MergeTree family. )", 0) \ \ - M(String, force_data_skipping_indices, "", R"( + DECLARE(String, force_data_skipping_indices, "", R"( Disables query execution if passed data skipping indices wasn't used. Consider the following example: @@ -1469,14 +1469,14 @@ SELECT * FROM data_01515 WHERE d1 = 0 AND assumeNotNull(d1_null) = 0 SETTINGS fo ``` )", 0) \ \ - M(Float, max_streams_to_max_threads_ratio, 1, R"( + DECLARE(Float, max_streams_to_max_threads_ratio, 1, R"( Allows you to use more sources than the number of threads - to more evenly distribute work across threads. It is assumed that this is a temporary solution since it will be possible in the future to make the number of sources equal to the number of threads, but for each source to dynamically select available work for itself. )", 0) \ - M(Float, max_streams_multiplier_for_merge_tables, 5, R"( + DECLARE(Float, max_streams_multiplier_for_merge_tables, 5, R"( Ask more streams when reading from Merge table. Streams will be spread across tables that Merge table will use. This allows more even distribution of work across threads and is especially helpful when merged tables differ in size. )", 0) \ \ - M(String, network_compression_method, "LZ4", R"( + DECLARE(String, network_compression_method, "LZ4", R"( Sets the method of data compression that is used for communication between servers and between server and [clickhouse-client](../../interfaces/cli.md). Possible values: @@ -1489,7 +1489,7 @@ Possible values: - [network_zstd_compression_level](#network_zstd_compression_level) )", 0) \ \ - M(Int64, network_zstd_compression_level, 1, R"( + DECLARE(Int64, network_zstd_compression_level, 1, R"( Adjusts the level of ZSTD compression. Used only when [network_compression_method](#network_compression_method) is set to `ZSTD`. Possible values: @@ -1497,14 +1497,14 @@ Possible values: - Positive integer from 1 to 15. )", 0) \ \ - M(Int64, zstd_window_log_max, 0, R"( + DECLARE(Int64, zstd_window_log_max, 0, R"( Allows you to select the max window log of ZSTD (it will not be used for MergeTree family) )", 0) \ \ - M(UInt64, priority, 0, R"( + DECLARE(UInt64, priority, 0, R"( Priority of the query. 1 - the highest, higher value - lower priority; 0 - do not use priorities. )", 0) \ - M(Int64, os_thread_priority, 0, R"( + DECLARE(Int64, os_thread_priority, 0, R"( Sets the priority ([nice](https://en.wikipedia.org/wiki/Nice_(Unix))) for threads that execute queries. The OS scheduler considers this priority when choosing the next thread to run on each available CPU core. :::note @@ -1518,7 +1518,7 @@ Possible values: Lower values mean higher priority. Threads with low `nice` priority values are executed more frequently than threads with high values. High values are preferable for long-running non-interactive queries because it allows them to quickly give up resources in favour of short interactive queries when they arrive. )", 0) \ \ - M(Bool, log_queries, true, R"( + DECLARE(Bool, log_queries, true, R"( Setting up query logging. Queries sent to ClickHouse with this setup are logged according to the rules in the [query_log](../../operations/server-configuration-parameters/settings.md/#query-log) server configuration parameter. @@ -1529,7 +1529,7 @@ Example: log_queries=1 ``` )", 0) \ - M(Bool, log_formatted_queries, false, R"( + DECLARE(Bool, log_formatted_queries, false, R"( Allows to log formatted queries to the [system.query_log](../../operations/system-tables/query_log.md) system table (populates `formatted_query` column in the [system.query_log](../../operations/system-tables/query_log.md)). Possible values: @@ -1537,7 +1537,7 @@ Possible values: - 0 — Formatted queries are not logged in the system table. - 1 — Formatted queries are logged in the system table. )", 0) \ - M(LogQueriesType, log_queries_min_type, QueryLogElementType::QUERY_START, R"( + DECLARE(LogQueriesType, log_queries_min_type, QueryLogElementType::QUERY_START, R"( `query_log` minimal type to log. Possible values: @@ -1552,7 +1552,7 @@ Can be used to limit which entities will go to `query_log`, say you are interest log_queries_min_type='EXCEPTION_WHILE_PROCESSING' ``` )", 0) \ - M(Milliseconds, log_queries_min_query_duration_ms, 0, R"( + DECLARE(Milliseconds, log_queries_min_query_duration_ms, 0, R"( If enabled (non-zero), queries faster than the value of this setting will not be logged (you can think about this as a `long_query_time` for [MySQL Slow Query Log](https://dev.mysql.com/doc/refman/5.7/en/slow-query-log.html)), and this basically means that you will not find them in the following tables: - `system.query_log` @@ -1566,10 +1566,10 @@ Only the queries with the following type will get to the log: - Type: milliseconds - Default value: 0 (any query) )", 0) \ - M(UInt64, log_queries_cut_to_length, 100000, R"( + DECLARE(UInt64, log_queries_cut_to_length, 100000, R"( If query length is greater than a specified threshold (in bytes), then cut query when writing to query log. Also limit the length of printed query in ordinary text log. )", 0) \ - M(Float, log_queries_probability, 1., R"( + DECLARE(Float, log_queries_probability, 1., R"( Allows a user to write to [query_log](../../operations/system-tables/query_log.md), [query_thread_log](../../operations/system-tables/query_thread_log.md), and [query_views_log](../../operations/system-tables/query_views_log.md) system tables only a sample of queries selected randomly with the specified probability. It helps to reduce the load with a large volume of queries in a second. Possible values: @@ -1579,7 +1579,7 @@ Possible values: - 1 — All queries are logged in the system tables. )", 0) \ \ - M(Bool, log_processors_profiles, true, R"( + DECLARE(Bool, log_processors_profiles, true, R"( Write time that processor spent during execution/waiting for data to `system.processors_profile_log` table. See also: @@ -1587,7 +1587,7 @@ See also: - [`system.processors_profile_log`](../../operations/system-tables/processors_profile_log.md) - [`EXPLAIN PIPELINE`](../../sql-reference/statements/explain.md#explain-pipeline) )", 0) \ - M(DistributedProductMode, distributed_product_mode, DistributedProductMode::DENY, R"( + DECLARE(DistributedProductMode, distributed_product_mode, DistributedProductMode::DENY, R"( Changes the behaviour of [distributed subqueries](../../sql-reference/operators/in.md). ClickHouse applies this setting when the query contains the product of distributed tables, i.e. when the query for a distributed table contains a non-GLOBAL subquery for the distributed table. @@ -1607,7 +1607,7 @@ Possible values: - `allow` — Allows the use of these types of subqueries. )", IMPORTANT) \ \ - M(UInt64, max_concurrent_queries_for_all_users, 0, R"( + DECLARE(UInt64, max_concurrent_queries_for_all_users, 0, R"( Throw exception if the value of this setting is less or equal than the current number of simultaneously processed queries. Example: `max_concurrent_queries_for_all_users` can be set to 99 for all users and database administrator can set it to 100 for itself to run queries for investigation even when the server is overloaded. @@ -1629,7 +1629,7 @@ Possible values: - [max_concurrent_queries](/docs/en/operations/server-configuration-parameters/settings.md/#max_concurrent_queries) )", 0) \ - M(UInt64, max_concurrent_queries_for_user, 0, R"( + DECLARE(UInt64, max_concurrent_queries_for_user, 0, R"( The maximum number of simultaneously processed queries per user. Possible values: @@ -1644,7 +1644,7 @@ Possible values: ``` )", 0) \ \ - M(Bool, insert_deduplicate, true, R"( + DECLARE(Bool, insert_deduplicate, true, R"( Enables or disables block deduplication of `INSERT` (for Replicated\* tables). Possible values: @@ -1656,11 +1656,11 @@ By default, blocks inserted into replicated tables by the `INSERT` statement are For the replicated tables by default the only 100 of the most recent blocks for each partition are deduplicated (see [replicated_deduplication_window](merge-tree-settings.md/#replicated-deduplication-window), [replicated_deduplication_window_seconds](merge-tree-settings.md/#replicated-deduplication-window-seconds)). For not replicated tables see [non_replicated_deduplication_window](merge-tree-settings.md/#non-replicated-deduplication-window). )", 0) \ - M(Bool, async_insert_deduplicate, false, R"( + DECLARE(Bool, async_insert_deduplicate, false, R"( For async INSERT queries in the replicated table, specifies that deduplication of inserting blocks should be performed )", 0) \ \ - M(UInt64Auto, insert_quorum, 0, R"( + DECLARE(UInt64Auto, insert_quorum, 0, R"( :::note This setting is not applicable to SharedMergeTree, see [SharedMergeTree consistency](/docs/en/cloud/reference/shared-merge-tree/#consistency) for more information. ::: @@ -1688,7 +1688,7 @@ See also: - [insert_quorum_parallel](#insert_quorum_parallel) - [select_sequential_consistency](#select_sequential_consistency) )", 0) \ - M(Milliseconds, insert_quorum_timeout, 600000, R"( + DECLARE(Milliseconds, insert_quorum_timeout, 600000, R"( Write to a quorum timeout in milliseconds. If the timeout has passed and no write has taken place yet, ClickHouse will generate an exception and the client must repeat the query to write the same block to the same or any other replica. See also: @@ -1697,7 +1697,7 @@ See also: - [insert_quorum_parallel](#insert_quorum_parallel) - [select_sequential_consistency](#select_sequential_consistency) )", 0) \ - M(Bool, insert_quorum_parallel, true, R"( + DECLARE(Bool, insert_quorum_parallel, true, R"( :::note This setting is not applicable to SharedMergeTree, see [SharedMergeTree consistency](/docs/en/cloud/reference/shared-merge-tree/#consistency) for more information. ::: @@ -1715,7 +1715,7 @@ See also: - [insert_quorum_timeout](#insert_quorum_timeout) - [select_sequential_consistency](#select_sequential_consistency) )", 0) \ - M(UInt64, select_sequential_consistency, 0, R"( + DECLARE(UInt64, select_sequential_consistency, 0, R"( :::note This setting differ in behavior between SharedMergeTree and ReplicatedMergeTree, see [SharedMergeTree consistency](/docs/en/cloud/reference/shared-merge-tree/#consistency) for more information about the behavior of `select_sequential_consistency` in SharedMergeTree. ::: @@ -1739,38 +1739,38 @@ See also: - [insert_quorum_timeout](#insert_quorum_timeout) - [insert_quorum_parallel](#insert_quorum_parallel) )", 0) \ - M(UInt64, table_function_remote_max_addresses, 1000, R"( + DECLARE(UInt64, table_function_remote_max_addresses, 1000, R"( Sets the maximum number of addresses generated from patterns for the [remote](../../sql-reference/table-functions/remote.md) function. Possible values: - Positive integer. )", 0) \ - M(Milliseconds, read_backoff_min_latency_ms, 1000, R"( + DECLARE(Milliseconds, read_backoff_min_latency_ms, 1000, R"( Setting to reduce the number of threads in case of slow reads. Pay attention only to reads that took at least that much time. )", 0) \ - M(UInt64, read_backoff_max_throughput, 1048576, R"( + DECLARE(UInt64, read_backoff_max_throughput, 1048576, R"( Settings to reduce the number of threads in case of slow reads. Count events when the read bandwidth is less than that many bytes per second. )", 0) \ - M(Milliseconds, read_backoff_min_interval_between_events_ms, 1000, R"( + DECLARE(Milliseconds, read_backoff_min_interval_between_events_ms, 1000, R"( Settings to reduce the number of threads in case of slow reads. Do not pay attention to the event, if the previous one has passed less than a certain amount of time. )", 0) \ - M(UInt64, read_backoff_min_events, 2, R"( + DECLARE(UInt64, read_backoff_min_events, 2, R"( Settings to reduce the number of threads in case of slow reads. The number of events after which the number of threads will be reduced. )", 0) \ \ - M(UInt64, read_backoff_min_concurrency, 1, R"( + DECLARE(UInt64, read_backoff_min_concurrency, 1, R"( Settings to try keeping the minimal number of threads in case of slow reads. )", 0) \ \ - M(Float, memory_tracker_fault_probability, 0., R"( + DECLARE(Float, memory_tracker_fault_probability, 0., R"( For testing of `exception safety` - throw an exception every time you allocate memory with the specified probability. )", 0) \ - M(Float, merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability, 0.0, R"( + DECLARE(Float, merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability, 0.0, R"( For testing of `PartsSplitter` - split read ranges into intersecting and non intersecting every time you read from MergeTree with the specified probability. )", 0) \ \ - M(Bool, enable_http_compression, false, R"( + DECLARE(Bool, enable_http_compression, false, R"( Enables or disables data compression in the response to an HTTP request. For more information, read the [HTTP interface description](../../interfaces/http.md). @@ -1780,13 +1780,13 @@ Possible values: - 0 — Disabled. - 1 — Enabled. )", 0) \ - M(Int64, http_zlib_compression_level, 3, R"( + DECLARE(Int64, http_zlib_compression_level, 3, R"( Sets the level of data compression in the response to an HTTP request if [enable_http_compression = 1](#enable_http_compression). Possible values: Numbers from 1 to 9. )", 0) \ \ - M(Bool, http_native_compression_disable_checksumming_on_decompress, false, R"( + DECLARE(Bool, http_native_compression_disable_checksumming_on_decompress, false, R"( Enables or disables checksum verification when decompressing the HTTP POST data from the client. Used only for ClickHouse native compression format (not used with `gzip` or `deflate`). For more information, read the [HTTP interface description](../../interfaces/http.md). @@ -1797,7 +1797,7 @@ Possible values: - 1 — Enabled. )", 0) \ \ - M(String, count_distinct_implementation, "uniqExact", R"( + DECLARE(String, count_distinct_implementation, "uniqExact", R"( Specifies which of the `uniq*` functions should be used to perform the [COUNT(DISTINCT ...)](../../sql-reference/aggregate-functions/reference/count.md/#agg_function-count) construction. Possible values: @@ -1809,19 +1809,19 @@ Possible values: - [uniqExact](../../sql-reference/aggregate-functions/reference/uniqexact.md/#agg_function-uniqexact) )", 0) \ \ - M(Bool, add_http_cors_header, false, R"( + DECLARE(Bool, add_http_cors_header, false, R"( Write add http CORS header. )", 0) \ \ - M(UInt64, max_http_get_redirects, 0, R"( + DECLARE(UInt64, max_http_get_redirects, 0, R"( Max number of HTTP GET redirects hops allowed. Ensures additional security measures are in place to prevent a malicious server from redirecting your requests to unexpected services.\n\nIt is the case when an external server redirects to another address, but that address appears to be internal to the company's infrastructure, and by sending an HTTP request to an internal server, you could request an internal API from the internal network, bypassing the auth, or even query other services, such as Redis or Memcached. When you don't have an internal infrastructure (including something running on your localhost), or you trust the server, it is safe to allow redirects. Although keep in mind, that if the URL uses HTTP instead of HTTPS, and you will have to trust not only the remote server but also your ISP and every network in the middle. )", 0) \ \ - M(Bool, use_client_time_zone, false, R"( + DECLARE(Bool, use_client_time_zone, false, R"( Use client timezone for interpreting DateTime string values, instead of adopting server timezone. )", 0) \ \ - M(Bool, send_progress_in_http_headers, false, R"( + DECLARE(Bool, send_progress_in_http_headers, false, R"( Enables or disables `X-ClickHouse-Progress` HTTP response headers in `clickhouse-server` responses. For more information, read the [HTTP interface description](../../interfaces/http.md). @@ -1832,26 +1832,26 @@ Possible values: - 1 — Enabled. )", 0) \ \ - M(UInt64, http_headers_progress_interval_ms, 100, R"( + DECLARE(UInt64, http_headers_progress_interval_ms, 100, R"( Do not send HTTP headers X-ClickHouse-Progress more frequently than at each specified interval. )", 0) \ - M(Bool, http_wait_end_of_query, false, R"( + DECLARE(Bool, http_wait_end_of_query, false, R"( Enable HTTP response buffering on the server-side. )", 0) \ - M(Bool, http_write_exception_in_output_format, true, R"( + DECLARE(Bool, http_write_exception_in_output_format, true, R"( Write exception in output format to produce valid output. Works with JSON and XML formats. )", 0) \ - M(UInt64, http_response_buffer_size, 0, R"( + DECLARE(UInt64, http_response_buffer_size, 0, R"( The number of bytes to buffer in the server memory before sending a HTTP response to the client or flushing to disk (when http_wait_end_of_query is enabled). )", 0) \ \ - M(Bool, fsync_metadata, true, R"( + DECLARE(Bool, fsync_metadata, true, R"( Enables or disables [fsync](http://pubs.opengroup.org/onlinepubs/9699919799/functions/fsync.html) when writing `.sql` files. Enabled by default. It makes sense to disable it if the server has millions of tiny tables that are constantly being created and destroyed. )", 0) \ \ - M(Bool, join_use_nulls, false, R"( + DECLARE(Bool, join_use_nulls, false, R"( Sets the type of [JOIN](../../sql-reference/statements/select/join.md) behaviour. When merging tables, empty cells may appear. ClickHouse fills them differently based on this setting. Possible values: @@ -1860,10 +1860,10 @@ Possible values: - 1 — `JOIN` behaves the same way as in standard SQL. The type of the corresponding field is converted to [Nullable](../../sql-reference/data-types/nullable.md/#data_type-nullable), and empty cells are filled with [NULL](../../sql-reference/syntax.md). )", IMPORTANT) \ \ - M(UInt64, join_output_by_rowlist_perkey_rows_threshold, 5, R"( + DECLARE(UInt64, join_output_by_rowlist_perkey_rows_threshold, 5, R"( The lower limit of per-key average rows in the right table to determine whether to output by row list in hash join. )", 0) \ - M(JoinStrictness, join_default_strictness, JoinStrictness::All, R"( + DECLARE(JoinStrictness, join_default_strictness, JoinStrictness::All, R"( Sets default strictness for [JOIN clauses](../../sql-reference/statements/select/join.md/#select-join). Possible values: @@ -1873,7 +1873,7 @@ Possible values: - `ASOF` — For joining sequences with an uncertain match. - `Empty string` — If `ALL` or `ANY` is not specified in the query, ClickHouse throws an exception. )", 0) \ - M(Bool, any_join_distinct_right_table_keys, false, R"( + DECLARE(Bool, any_join_distinct_right_table_keys, false, R"( Enables legacy ClickHouse server behaviour in `ANY INNER|LEFT JOIN` operations. :::note @@ -1899,15 +1899,15 @@ See also: - [JOIN strictness](../../sql-reference/statements/select/join.md/#join-settings) )", IMPORTANT) \ - M(Bool, single_join_prefer_left_table, true, R"( + DECLARE(Bool, single_join_prefer_left_table, true, R"( For single JOIN in case of identifier ambiguity prefer left table )", IMPORTANT) \ \ - M(UInt64, preferred_block_size_bytes, 1000000, R"( + DECLARE(UInt64, preferred_block_size_bytes, 1000000, R"( This setting adjusts the data block size for query processing and represents additional fine-tuning to the more rough 'max_block_size' setting. If the columns are large and with 'max_block_size' rows the block size is likely to be larger than the specified amount of bytes, its size will be lowered for better CPU cache locality. )", 0) \ \ - M(UInt64, max_replica_delay_for_distributed_queries, 300, R"( + DECLARE(UInt64, max_replica_delay_for_distributed_queries, 300, R"( Disables lagging replicas for distributed queries. See [Replication](../../engines/table-engines/mergetree-family/replication.md). Sets the time in seconds. If a replica's lag is greater than or equal to the set value, this replica is not used. @@ -1921,7 +1921,7 @@ To prevent the use of any replica with a non-zero lag, set this parameter to 1. Used when performing `SELECT` from a distributed table that points to replicated tables. )", 0) \ - M(Bool, fallback_to_stale_replicas_for_distributed_queries, true, R"( + DECLARE(Bool, fallback_to_stale_replicas_for_distributed_queries, true, R"( Forces a query to an out-of-date replica if updated data is not available. See [Replication](../../engines/table-engines/mergetree-family/replication.md). ClickHouse selects the most relevant from the outdated replicas of the table. @@ -1930,23 +1930,23 @@ Used when performing `SELECT` from a distributed table that points to replicated By default, 1 (enabled). )", 0) \ - M(UInt64, preferred_max_column_in_block_size_bytes, 0, R"( + DECLARE(UInt64, preferred_max_column_in_block_size_bytes, 0, R"( Limit on max column size in block while reading. Helps to decrease cache misses count. Should be close to L2 cache size. )", 0) \ \ - M(UInt64, parts_to_delay_insert, 0, R"( + DECLARE(UInt64, parts_to_delay_insert, 0, R"( If the destination table contains at least that many active parts in a single partition, artificially slow down insert into table. )", 0) \ - M(UInt64, parts_to_throw_insert, 0, R"( + DECLARE(UInt64, parts_to_throw_insert, 0, R"( If more than this number active parts in a single partition of the destination table, throw 'Too many parts ...' exception. )", 0) \ - M(UInt64, number_of_mutations_to_delay, 0, R"( + DECLARE(UInt64, number_of_mutations_to_delay, 0, R"( If the mutated table contains at least that many unfinished mutations, artificially slow down mutations of table. 0 - disabled )", 0) \ - M(UInt64, number_of_mutations_to_throw, 0, R"( + DECLARE(UInt64, number_of_mutations_to_throw, 0, R"( If the mutated table contains at least that many unfinished mutations, throw 'Too many mutations ...' exception. 0 - disabled )", 0) \ - M(Int64, distributed_ddl_task_timeout, 180, R"( + DECLARE(Int64, distributed_ddl_task_timeout, 180, R"( Sets timeout for DDL query responses from all hosts in cluster. If a DDL request has not been performed on all hosts, a response will contain a timeout error and a request will be executed in an async mode. Negative value means infinite. Possible values: @@ -1955,24 +1955,24 @@ Possible values: - 0 — Async mode. - Negative integer — infinite timeout. )", 0) \ - M(Milliseconds, stream_flush_interval_ms, 7500, R"( + DECLARE(Milliseconds, stream_flush_interval_ms, 7500, R"( Works for tables with streaming in the case of a timeout, or when a thread generates [max_insert_block_size](#max_insert_block_size) rows. The default value is 7500. The smaller the value, the more often data is flushed into the table. Setting the value too low leads to poor performance. )", 0) \ - M(Milliseconds, stream_poll_timeout_ms, 500, R"( + DECLARE(Milliseconds, stream_poll_timeout_ms, 500, R"( Timeout for polling data from/to streaming storages. )", 0) \ - M(UInt64, min_free_disk_bytes_to_perform_insert, 0, R"( + DECLARE(UInt64, min_free_disk_bytes_to_perform_insert, 0, R"( Minimum free disk space bytes to perform an insert. )", 0) \ - M(Float, min_free_disk_ratio_to_perform_insert, 0.0, R"( + DECLARE(Float, min_free_disk_ratio_to_perform_insert, 0.0, R"( Minimum free disk space ratio to perform an insert. )", 0) \ \ - M(Bool, final, false, R"( + DECLARE(Bool, final, false, R"( Automatically applies [FINAL](../../sql-reference/statements/select/from.md#final-modifier) modifier to all tables in a query, to tables where [FINAL](../../sql-reference/statements/select/from.md#final-modifier) is applicable, including joined tables and tables in sub-queries, and distributed tables. @@ -2016,37 +2016,37 @@ SELECT * FROM test; ``` )", 0) \ \ - M(Bool, partial_result_on_first_cancel, false, R"( + DECLARE(Bool, partial_result_on_first_cancel, false, R"( Allows query to return a partial result after cancel. )", 0) \ \ - M(Bool, ignore_on_cluster_for_replicated_udf_queries, false, R"( + DECLARE(Bool, ignore_on_cluster_for_replicated_udf_queries, false, R"( Ignore ON CLUSTER clause for replicated UDF management queries. )", 0) \ - M(Bool, ignore_on_cluster_for_replicated_access_entities_queries, false, R"( + DECLARE(Bool, ignore_on_cluster_for_replicated_access_entities_queries, false, R"( Ignore ON CLUSTER clause for replicated access entities management queries. )", 0) \ - M(Bool, ignore_on_cluster_for_replicated_named_collections_queries, false, R"( + DECLARE(Bool, ignore_on_cluster_for_replicated_named_collections_queries, false, R"( Ignore ON CLUSTER clause for replicated named collections management queries. )", 0) \ /** Settings for testing hedged requests */ \ - M(Milliseconds, sleep_in_send_tables_status_ms, 0, R"( + DECLARE(Milliseconds, sleep_in_send_tables_status_ms, 0, R"( Time to sleep in sending tables status response in TCPHandler )", 0) \ - M(Milliseconds, sleep_in_send_data_ms, 0, R"( + DECLARE(Milliseconds, sleep_in_send_data_ms, 0, R"( Time to sleep in sending data in TCPHandler )", 0) \ - M(Milliseconds, sleep_after_receiving_query_ms, 0, R"( + DECLARE(Milliseconds, sleep_after_receiving_query_ms, 0, R"( Time to sleep after receiving query in TCPHandler )", 0) \ - M(UInt64, unknown_packet_in_send_data, 0, R"( + DECLARE(UInt64, unknown_packet_in_send_data, 0, R"( Send unknown packet instead of data Nth data packet )", 0) \ \ - M(Bool, insert_allow_materialized_columns, false, R"( + DECLARE(Bool, insert_allow_materialized_columns, false, R"( If setting is enabled, Allow materialized columns in INSERT. )", 0) \ - M(Seconds, http_connection_timeout, DEFAULT_HTTP_READ_BUFFER_CONNECTION_TIMEOUT, R"( + DECLARE(Seconds, http_connection_timeout, DEFAULT_HTTP_READ_BUFFER_CONNECTION_TIMEOUT, R"( HTTP connection timeout (in seconds). Possible values: @@ -2054,7 +2054,7 @@ Possible values: - Any positive integer. - 0 - Disabled (infinite timeout). )", 0) \ - M(Seconds, http_send_timeout, DEFAULT_HTTP_READ_BUFFER_TIMEOUT, R"( + DECLARE(Seconds, http_send_timeout, DEFAULT_HTTP_READ_BUFFER_TIMEOUT, R"( HTTP send timeout (in seconds). Possible values: @@ -2066,7 +2066,7 @@ Possible values: It's applicable only to the default profile. A server reboot is required for the changes to take effect. ::: )", 0) \ - M(Seconds, http_receive_timeout, DEFAULT_HTTP_READ_BUFFER_TIMEOUT, R"( + DECLARE(Seconds, http_receive_timeout, DEFAULT_HTTP_READ_BUFFER_TIMEOUT, R"( HTTP receive timeout (in seconds). Possible values: @@ -2074,29 +2074,29 @@ Possible values: - Any positive integer. - 0 - Disabled (infinite timeout). )", 0) \ - M(UInt64, http_max_uri_size, 1048576, R"( + DECLARE(UInt64, http_max_uri_size, 1048576, R"( Sets the maximum URI length of an HTTP request. Possible values: - Positive integer. )", 0) \ - M(UInt64, http_max_fields, 1000000, R"( + DECLARE(UInt64, http_max_fields, 1000000, R"( Maximum number of fields in HTTP header )", 0) \ - M(UInt64, http_max_field_name_size, 128 * 1024, R"( + DECLARE(UInt64, http_max_field_name_size, 128 * 1024, R"( Maximum length of field name in HTTP header )", 0) \ - M(UInt64, http_max_field_value_size, 128 * 1024, R"( + DECLARE(UInt64, http_max_field_value_size, 128 * 1024, R"( Maximum length of field value in HTTP header )", 0) \ - M(Bool, http_skip_not_found_url_for_globs, true, R"( + DECLARE(Bool, http_skip_not_found_url_for_globs, true, R"( Skip URLs for globs with HTTP_NOT_FOUND error )", 0) \ - M(Bool, http_make_head_request, true, R"( + DECLARE(Bool, http_make_head_request, true, R"( The `http_make_head_request` setting allows the execution of a `HEAD` request while reading data from HTTP to retrieve information about the file to be read, such as its size. Since it's enabled by default, it may be desirable to disable this setting in cases where the server does not support `HEAD` requests. )", 0) \ - M(Bool, optimize_throw_if_noop, false, R"( + DECLARE(Bool, optimize_throw_if_noop, false, R"( Enables or disables throwing an exception if an [OPTIMIZE](../../sql-reference/statements/optimize.md) query didn’t perform a merge. By default, `OPTIMIZE` returns successfully even if it didn’t do anything. This setting lets you differentiate these situations and get the reason in an exception message. @@ -2106,47 +2106,34 @@ Possible values: - 1 — Throwing an exception is enabled. - 0 — Throwing an exception is disabled. )", 0) \ - M(Bool, use_index_for_in_with_subqueries, true, R"( + DECLARE(Bool, use_index_for_in_with_subqueries, true, R"( Try using an index if there is a subquery or a table expression on the right side of the IN operator. )", 0) \ - M(UInt64, use_index_for_in_with_subqueries_max_values, 0, R"( + DECLARE(UInt64, use_index_for_in_with_subqueries_max_values, 0, R"( The maximum size of the set in the right-hand side of the IN operator to use table index for filtering. It allows to avoid performance degradation and higher memory usage due to the preparation of additional data structures for large queries. Zero means no limit. )", 0) \ - M(Bool, analyze_index_with_space_filling_curves, true, R"( + DECLARE(Bool, analyze_index_with_space_filling_curves, true, R"( If a table has a space-filling curve in its index, e.g. `ORDER BY mortonEncode(x, y)` or `ORDER BY hilbertEncode(x, y)`, and the query has conditions on its arguments, e.g. `x >= 10 AND x <= 20 AND y >= 20 AND y <= 30`, use the space-filling curve for index analysis. )", 0) \ - M(Bool, joined_subquery_requires_alias, true, R"( + DECLARE(Bool, joined_subquery_requires_alias, true, R"( Force joined subqueries and table functions to have aliases for correct name qualification. )", 0) \ - M(Bool, empty_result_for_aggregation_by_empty_set, false, R"( + DECLARE(Bool, empty_result_for_aggregation_by_empty_set, false, R"( Return empty result when aggregating without keys on empty set. )", 0) \ - M(Bool, empty_result_for_aggregation_by_constant_keys_on_empty_set, true, R"( + DECLARE(Bool, empty_result_for_aggregation_by_constant_keys_on_empty_set, true, R"( Return empty result when aggregating by constant keys on empty set. )", 0) \ - M(Bool, allow_distributed_ddl, true, R"( + DECLARE(Bool, allow_distributed_ddl, true, R"( If it is set to true, then a user is allowed to executed distributed DDL queries. )", 0) \ - M(Bool, allow_suspicious_codecs, false, R"( + DECLARE(Bool, allow_suspicious_codecs, false, R"( If it is set to true, allow to specify meaningless compression codecs. )", 0) \ - M(Bool, enable_deflate_qpl_codec, false, R"( -If turned on, the DEFLATE_QPL codec may be used to compress columns. - -Possible values: - -- 0 - Disabled -- 1 - Enabled -)", 0) \ - M(Bool, enable_zstd_qat_codec, false, R"( + DECLARE(Bool, enable_zstd_qat_codec, false, R"( If turned on, the ZSTD_QAT codec may be used to compress columns. - -Possible values: - -- 0 - Disabled -- 1 - Enabled )", 0) \ - M(UInt64, query_profiler_real_time_period_ns, QUERY_PROFILER_DEFAULT_SAMPLE_RATE_NS, R"( + DECLARE(UInt64, query_profiler_real_time_period_ns, QUERY_PROFILER_DEFAULT_SAMPLE_RATE_NS, R"( Sets the period for a real clock timer of the [query profiler](../../operations/optimizing-performance/sampling-query-profiler.md). Real clock timer counts wall-clock time. Possible values: @@ -2166,7 +2153,7 @@ See also: - System table [trace_log](../../operations/system-tables/trace_log.md/#system_tables-trace_log) )", 0) \ - M(UInt64, query_profiler_cpu_time_period_ns, QUERY_PROFILER_DEFAULT_SAMPLE_RATE_NS, R"( + DECLARE(UInt64, query_profiler_cpu_time_period_ns, QUERY_PROFILER_DEFAULT_SAMPLE_RATE_NS, R"( Sets the period for a CPU clock timer of the [query profiler](../../operations/optimizing-performance/sampling-query-profiler.md). This timer counts only CPU time. Possible values: @@ -2186,13 +2173,13 @@ See also: - System table [trace_log](../../operations/system-tables/trace_log.md/#system_tables-trace_log) )", 0) \ - M(Bool, metrics_perf_events_enabled, false, R"( + DECLARE(Bool, metrics_perf_events_enabled, false, R"( If enabled, some of the perf events will be measured throughout queries' execution. )", 0) \ - M(String, metrics_perf_events_list, "", R"( + DECLARE(String, metrics_perf_events_list, "", R"( Comma separated list of perf metrics that will be measured throughout queries' execution. Empty means all events. See PerfEventInfo in sources for the available events. )", 0) \ - M(Float, opentelemetry_start_trace_probability, 0., R"( + DECLARE(Float, opentelemetry_start_trace_probability, 0., R"( Sets the probability that the ClickHouse can start a trace for executed queries (if no parent [trace context](https://www.w3.org/TR/trace-context/) is supplied). Possible values: @@ -2201,10 +2188,10 @@ Possible values: - Positive floating-point number in the range [0..1]. For example, if the setting value is `0,5`, ClickHouse can start a trace on average for half of the queries. - 1 — The trace for all executed queries is enabled. )", 0) \ - M(Bool, opentelemetry_trace_processors, false, R"( + DECLARE(Bool, opentelemetry_trace_processors, false, R"( Collect OpenTelemetry spans for processors. )", 0) \ - M(Bool, prefer_column_name_to_alias, false, R"( + DECLARE(Bool, prefer_column_name_to_alias, false, R"( Enables or disables using the original column names instead of aliases in query expressions and clauses. It especially matters when alias is the same as the column name, see [Expression Aliases](../../sql-reference/syntax.md/#notes-on-usage). Enable this setting to make aliases syntax rules in ClickHouse more compatible with most other database engines. Possible values: @@ -2246,7 +2233,7 @@ Result: ``` )", 0) \ \ - M(Bool, prefer_global_in_and_join, false, R"( + DECLARE(Bool, prefer_global_in_and_join, false, R"( Enables the replacement of `IN`/`JOIN` operators with `GLOBAL IN`/`GLOBAL JOIN`. Possible values: @@ -2266,7 +2253,7 @@ Another use case of `prefer_global_in_and_join` is accessing tables created by - [Distributed subqueries](../../sql-reference/operators/in.md/#select-distributed-subqueries) for more information on how to use `GLOBAL IN`/`GLOBAL JOIN` )", 0) \ - M(Bool, enable_vertical_final, true, R"( + DECLARE(Bool, enable_vertical_final, true, R"( If enable, remove duplicated rows during FINAL by marking rows as deleted and filtering them later instead of merging rows )", 0) \ \ @@ -2278,155 +2265,155 @@ If enable, remove duplicated rows during FINAL by marking rows as deleted and fi * Almost all limits apply to each stream individually. \ */ \ \ - M(UInt64, max_rows_to_read, 0, R"( + DECLARE(UInt64, max_rows_to_read, 0, R"( Limit on read rows from the most 'deep' sources. That is, only in the deepest subquery. When reading from a remote server, it is only checked on a remote server. )", 0) \ - M(UInt64, max_bytes_to_read, 0, R"( + DECLARE(UInt64, max_bytes_to_read, 0, R"( Limit on read bytes (after decompression) from the most 'deep' sources. That is, only in the deepest subquery. When reading from a remote server, it is only checked on a remote server. )", 0) \ - M(OverflowMode, read_overflow_mode, OverflowMode::THROW, R"( + DECLARE(OverflowMode, read_overflow_mode, OverflowMode::THROW, R"( What to do when the limit is exceeded. )", 0) \ \ - M(UInt64, max_rows_to_read_leaf, 0, R"( + DECLARE(UInt64, max_rows_to_read_leaf, 0, R"( Limit on read rows on the leaf nodes for distributed queries. Limit is applied for local reads only, excluding the final merge stage on the root node. Note, the setting is unstable with prefer_localhost_replica=1. )", 0) \ - M(UInt64, max_bytes_to_read_leaf, 0, R"( + DECLARE(UInt64, max_bytes_to_read_leaf, 0, R"( Limit on read bytes (after decompression) on the leaf nodes for distributed queries. Limit is applied for local reads only, excluding the final merge stage on the root node. Note, the setting is unstable with prefer_localhost_replica=1. )", 0) \ - M(OverflowMode, read_overflow_mode_leaf, OverflowMode::THROW, R"( + DECLARE(OverflowMode, read_overflow_mode_leaf, OverflowMode::THROW, R"( What to do when the leaf limit is exceeded. )", 0) \ \ - M(UInt64, max_rows_to_group_by, 0, R"( + DECLARE(UInt64, max_rows_to_group_by, 0, R"( If aggregation during GROUP BY is generating more than the specified number of rows (unique GROUP BY keys), the behavior will be determined by the 'group_by_overflow_mode' which by default is - throw an exception, but can be also switched to an approximate GROUP BY mode. )", 0) \ - M(OverflowModeGroupBy, group_by_overflow_mode, OverflowMode::THROW, R"( + DECLARE(OverflowModeGroupBy, group_by_overflow_mode, OverflowMode::THROW, R"( What to do when the limit is exceeded. )", 0) \ - M(UInt64, max_bytes_before_external_group_by, 0, R"( + DECLARE(UInt64, max_bytes_before_external_group_by, 0, R"( If memory usage during GROUP BY operation is exceeding this threshold in bytes, activate the 'external aggregation' mode (spill data to disk). Recommended value is half of the available system memory. )", 0) \ \ - M(UInt64, max_rows_to_sort, 0, R"( + DECLARE(UInt64, max_rows_to_sort, 0, R"( If more than the specified amount of records have to be processed for ORDER BY operation, the behavior will be determined by the 'sort_overflow_mode' which by default is - throw an exception )", 0) \ - M(UInt64, max_bytes_to_sort, 0, R"( + DECLARE(UInt64, max_bytes_to_sort, 0, R"( If more than the specified amount of (uncompressed) bytes have to be processed for ORDER BY operation, the behavior will be determined by the 'sort_overflow_mode' which by default is - throw an exception )", 0) \ - M(OverflowMode, sort_overflow_mode, OverflowMode::THROW, R"( + DECLARE(OverflowMode, sort_overflow_mode, OverflowMode::THROW, R"( What to do when the limit is exceeded. )", 0) \ - M(UInt64, prefer_external_sort_block_bytes, DEFAULT_BLOCK_SIZE * 256, R"( + DECLARE(UInt64, prefer_external_sort_block_bytes, DEFAULT_BLOCK_SIZE * 256, R"( Prefer maximum block bytes for external sort, reduce the memory usage during merging. )", 0) \ - M(UInt64, max_bytes_before_external_sort, 0, R"( + DECLARE(UInt64, max_bytes_before_external_sort, 0, R"( If memory usage during ORDER BY operation is exceeding this threshold in bytes, activate the 'external sorting' mode (spill data to disk). Recommended value is half of the available system memory. )", 0) \ - M(UInt64, max_bytes_before_remerge_sort, 1000000000, R"( + DECLARE(UInt64, max_bytes_before_remerge_sort, 1000000000, R"( In case of ORDER BY with LIMIT, when memory usage is higher than specified threshold, perform additional steps of merging blocks before final merge to keep just top LIMIT rows. )", 0) \ - M(Float, remerge_sort_lowered_memory_bytes_ratio, 2., R"( + DECLARE(Float, remerge_sort_lowered_memory_bytes_ratio, 2., R"( If memory usage after remerge does not reduced by this ratio, remerge will be disabled. )", 0) \ \ - M(UInt64, max_result_rows, 0, R"( + DECLARE(UInt64, max_result_rows, 0, R"( Limit on result size in rows. The query will stop after processing a block of data if the threshold is met, but it will not cut the last block of the result, therefore the result size can be larger than the threshold. )", 0) \ - M(UInt64, max_result_bytes, 0, R"( + DECLARE(UInt64, max_result_bytes, 0, R"( Limit on result size in bytes (uncompressed). The query will stop after processing a block of data if the threshold is met, but it will not cut the last block of the result, therefore the result size can be larger than the threshold. Caveats: the result size in memory is taken into account for this threshold. Even if the result size is small, it can reference larger data structures in memory, representing dictionaries of LowCardinality columns, and Arenas of AggregateFunction columns, so the threshold can be exceeded despite the small result size. The setting is fairly low level and should be used with caution. )", 0) \ - M(OverflowMode, result_overflow_mode, OverflowMode::THROW, R"( + DECLARE(OverflowMode, result_overflow_mode, OverflowMode::THROW, R"( What to do when the limit is exceeded. )", 0) \ \ /* TODO: Check also when merging and finalizing aggregate functions. */ \ - M(Seconds, max_execution_time, 0, R"( + DECLARE(Seconds, max_execution_time, 0, R"( If query runtime exceeds the specified number of seconds, the behavior will be determined by the 'timeout_overflow_mode', which by default is - throw an exception. Note that the timeout is checked and the query can stop only in designated places during data processing. It currently cannot stop during merging of aggregation states or during query analysis, and the actual run time will be higher than the value of this setting. )", 0) \ - M(OverflowMode, timeout_overflow_mode, OverflowMode::THROW, R"( + DECLARE(OverflowMode, timeout_overflow_mode, OverflowMode::THROW, R"( What to do when the limit is exceeded. )", 0) \ - M(Seconds, max_execution_time_leaf, 0, R"( + DECLARE(Seconds, max_execution_time_leaf, 0, R"( Similar semantic to max_execution_time but only apply on leaf node for distributed queries, the time out behavior will be determined by 'timeout_overflow_mode_leaf' which by default is - throw an exception )", 0) \ - M(OverflowMode, timeout_overflow_mode_leaf, OverflowMode::THROW, R"( + DECLARE(OverflowMode, timeout_overflow_mode_leaf, OverflowMode::THROW, R"( What to do when the leaf limit is exceeded. )", 0) \ \ - M(UInt64, min_execution_speed, 0, R"( + DECLARE(UInt64, min_execution_speed, 0, R"( Minimum number of execution rows per second. )", 0) \ - M(UInt64, max_execution_speed, 0, R"( + DECLARE(UInt64, max_execution_speed, 0, R"( Maximum number of execution rows per second. )", 0) \ - M(UInt64, min_execution_speed_bytes, 0, R"( + DECLARE(UInt64, min_execution_speed_bytes, 0, R"( Minimum number of execution bytes per second. )", 0) \ - M(UInt64, max_execution_speed_bytes, 0, R"( + DECLARE(UInt64, max_execution_speed_bytes, 0, R"( Maximum number of execution bytes per second. )", 0) \ - M(Seconds, timeout_before_checking_execution_speed, 10, R"( + DECLARE(Seconds, timeout_before_checking_execution_speed, 10, R"( Check that the speed is not too low after the specified time has elapsed. )", 0) \ - M(Seconds, max_estimated_execution_time, 0, R"( + DECLARE(Seconds, max_estimated_execution_time, 0, R"( Maximum query estimate execution time in seconds. )", 0) \ \ - M(UInt64, max_columns_to_read, 0, R"( + DECLARE(UInt64, max_columns_to_read, 0, R"( If a query requires reading more than specified number of columns, exception is thrown. Zero value means unlimited. This setting is useful to prevent too complex queries. )", 0) \ - M(UInt64, max_temporary_columns, 0, R"( + DECLARE(UInt64, max_temporary_columns, 0, R"( If a query generates more than the specified number of temporary columns in memory as a result of intermediate calculation, the exception is thrown. Zero value means unlimited. This setting is useful to prevent too complex queries. )", 0) \ - M(UInt64, max_temporary_non_const_columns, 0, R"( + DECLARE(UInt64, max_temporary_non_const_columns, 0, R"( Similar to the 'max_temporary_columns' setting but applies only to non-constant columns. This makes sense because constant columns are cheap and it is reasonable to allow more of them. )", 0) \ \ - M(UInt64, max_sessions_for_user, 0, R"( + DECLARE(UInt64, max_sessions_for_user, 0, R"( Maximum number of simultaneous sessions for a user. )", 0) \ \ - M(UInt64, max_subquery_depth, 100, R"( + DECLARE(UInt64, max_subquery_depth, 100, R"( If a query has more than the specified number of nested subqueries, throw an exception. This allows you to have a sanity check to protect the users of your cluster from going insane with their queries. )", 0) \ - M(UInt64, max_analyze_depth, 5000, R"( + DECLARE(UInt64, max_analyze_depth, 5000, R"( Maximum number of analyses performed by interpreter. )", 0) \ - M(UInt64, max_ast_depth, 1000, R"( + DECLARE(UInt64, max_ast_depth, 1000, R"( Maximum depth of query syntax tree. Checked after parsing. )", 0) \ - M(UInt64, max_ast_elements, 50000, R"( + DECLARE(UInt64, max_ast_elements, 50000, R"( Maximum size of query syntax tree in number of nodes. Checked after parsing. )", 0) \ - M(UInt64, max_expanded_ast_elements, 500000, R"( + DECLARE(UInt64, max_expanded_ast_elements, 500000, R"( Maximum size of query syntax tree in number of nodes after expansion of aliases and the asterisk. )", 0) \ \ - M(UInt64, readonly, 0, R"( + DECLARE(UInt64, readonly, 0, R"( 0 - no read-only restrictions. 1 - only read requests, as well as changing explicitly allowed settings. 2 - only read requests, as well as changing settings, except for the 'readonly' setting. )", 0) \ \ - M(UInt64, max_rows_in_set, 0, R"( + DECLARE(UInt64, max_rows_in_set, 0, R"( Maximum size of the set (in number of elements) resulting from the execution of the IN section. )", 0) \ - M(UInt64, max_bytes_in_set, 0, R"( + DECLARE(UInt64, max_bytes_in_set, 0, R"( Maximum size of the set (in bytes in memory) resulting from the execution of the IN section. )", 0) \ - M(OverflowMode, set_overflow_mode, OverflowMode::THROW, R"( + DECLARE(OverflowMode, set_overflow_mode, OverflowMode::THROW, R"( What to do when the limit is exceeded. )", 0) \ \ - M(UInt64, max_rows_in_join, 0, R"( + DECLARE(UInt64, max_rows_in_join, 0, R"( Maximum size of the hash table for JOIN (in number of rows). )", 0) \ - M(UInt64, max_bytes_in_join, 0, R"( + DECLARE(UInt64, max_bytes_in_join, 0, R"( Maximum size of the hash table for JOIN (in number of bytes in memory). )", 0) \ - M(OverflowMode, join_overflow_mode, OverflowMode::THROW, R"( + DECLARE(OverflowMode, join_overflow_mode, OverflowMode::THROW, R"( What to do when the limit is exceeded. )", 0) \ - M(Bool, join_any_take_last_row, false, R"( + DECLARE(Bool, join_any_take_last_row, false, R"( Changes the behaviour of join operations with `ANY` strictness. :::note @@ -2444,7 +2431,7 @@ See also: - [Join table engine](../../engines/table-engines/special/join.md) - [join_default_strictness](#join_default_strictness) )", IMPORTANT) \ - M(JoinAlgorithm, join_algorithm, JoinAlgorithm::DEFAULT, R"( + DECLARE(JoinAlgorithm, join_algorithm, JoinAlgorithm::DEFAULT, R"( Specifies which [JOIN](../../sql-reference/statements/select/join.md) algorithm is used. Several algorithms can be specified, and an available one would be chosen for a particular query based on kind/strictness and table engine. @@ -2499,19 +2486,19 @@ Possible values: ClickHouse always tries to use `partial_merge` join if possible, otherwise, it uses `hash`. *Deprecated*, same as `partial_merge,hash`. )", 0) \ - M(UInt64, cross_join_min_rows_to_compress, 10000000, R"( + DECLARE(UInt64, cross_join_min_rows_to_compress, 10000000, R"( Minimal count of rows to compress block in CROSS JOIN. Zero value means - disable this threshold. This block is compressed when any of the two thresholds (by rows or by bytes) are reached. )", 0) \ - M(UInt64, cross_join_min_bytes_to_compress, 1_GiB, R"( + DECLARE(UInt64, cross_join_min_bytes_to_compress, 1_GiB, R"( Minimal size of block to compress in CROSS JOIN. Zero value means - disable this threshold. This block is compressed when any of the two thresholds (by rows or by bytes) are reached. )", 0) \ - M(UInt64, default_max_bytes_in_join, 1000000000, R"( + DECLARE(UInt64, default_max_bytes_in_join, 1000000000, R"( Maximum size of right-side table if limit is required but max_bytes_in_join is not set. )", 0) \ - M(UInt64, partial_merge_join_left_table_buffer_bytes, 0, R"( + DECLARE(UInt64, partial_merge_join_left_table_buffer_bytes, 0, R"( If not 0 group left table blocks in bigger ones for left-side table in partial merge join. It uses up to 2x of specified memory per joining thread. )", 0) \ - M(UInt64, partial_merge_join_rows_in_right_blocks, 65536, R"( + DECLARE(UInt64, partial_merge_join_rows_in_right_blocks, 65536, R"( Limits sizes of right-hand join data blocks in partial merge join algorithm for [JOIN](../../sql-reference/statements/select/join.md) queries. ClickHouse server: @@ -2524,7 +2511,7 @@ Possible values: - Any positive integer. Recommended range of values: \[1000, 100000\]. )", 0) \ - M(UInt64, join_on_disk_max_files_to_merge, 64, R"( + DECLARE(UInt64, join_on_disk_max_files_to_merge, 64, R"( Limits the number of files allowed for parallel sorting in MergeJoin operations when they are executed on disk. The bigger the value of the setting, the more RAM is used and the less disk I/O is needed. @@ -2533,7 +2520,7 @@ Possible values: - Any positive integer, starting from 2. )", 0) \ - M(UInt64, max_rows_in_set_to_optimize_join, 0, R"( + DECLARE(UInt64, max_rows_in_set_to_optimize_join, 0, R"( Maximal size of the set to filter joined tables by each other's row sets before joining. Possible values: @@ -2542,11 +2529,11 @@ Possible values: - Any positive integer. )", 0) \ \ - M(Bool, compatibility_ignore_collation_in_create_table, true, R"( + DECLARE(Bool, compatibility_ignore_collation_in_create_table, true, R"( Compatibility ignore collation in create table )", 0) \ \ - M(String, temporary_files_codec, "LZ4", R"( + DECLARE(String, temporary_files_codec, "LZ4", R"( Sets compression codec for temporary files used in sorting and joining operations on disk. Possible values: @@ -2555,48 +2542,48 @@ Possible values: - NONE — No compression is applied. )", 0) \ \ - M(UInt64, max_rows_to_transfer, 0, R"( + DECLARE(UInt64, max_rows_to_transfer, 0, R"( Maximum size (in rows) of the transmitted external table obtained when the GLOBAL IN/JOIN section is executed. )", 0) \ - M(UInt64, max_bytes_to_transfer, 0, R"( + DECLARE(UInt64, max_bytes_to_transfer, 0, R"( Maximum size (in uncompressed bytes) of the transmitted external table obtained when the GLOBAL IN/JOIN section is executed. )", 0) \ - M(OverflowMode, transfer_overflow_mode, OverflowMode::THROW, R"( + DECLARE(OverflowMode, transfer_overflow_mode, OverflowMode::THROW, R"( What to do when the limit is exceeded. )", 0) \ \ - M(UInt64, max_rows_in_distinct, 0, R"( + DECLARE(UInt64, max_rows_in_distinct, 0, R"( Maximum number of elements during execution of DISTINCT. )", 0) \ - M(UInt64, max_bytes_in_distinct, 0, R"( + DECLARE(UInt64, max_bytes_in_distinct, 0, R"( Maximum total size of the state (in uncompressed bytes) in memory for the execution of DISTINCT. )", 0) \ - M(OverflowMode, distinct_overflow_mode, OverflowMode::THROW, R"( + DECLARE(OverflowMode, distinct_overflow_mode, OverflowMode::THROW, R"( What to do when the limit is exceeded. )", 0) \ \ - M(UInt64, max_memory_usage, 0, R"( + DECLARE(UInt64, max_memory_usage, 0, R"( Maximum memory usage for processing of single query. Zero means unlimited. )", 0) \ - M(UInt64, memory_overcommit_ratio_denominator, 1_GiB, R"( + DECLARE(UInt64, memory_overcommit_ratio_denominator, 1_GiB, R"( It represents the soft memory limit when the hard limit is reached on the global level. This value is used to compute the overcommit ratio for the query. Zero means skip the query. Read more about [memory overcommit](memory-overcommit.md). )", 0) \ - M(UInt64, max_memory_usage_for_user, 0, R"( + DECLARE(UInt64, max_memory_usage_for_user, 0, R"( Maximum memory usage for processing all concurrently running queries for the user. Zero means unlimited. )", 0) \ - M(UInt64, memory_overcommit_ratio_denominator_for_user, 1_GiB, R"( + DECLARE(UInt64, memory_overcommit_ratio_denominator_for_user, 1_GiB, R"( It represents the soft memory limit when the hard limit is reached on the user level. This value is used to compute the overcommit ratio for the query. Zero means skip the query. Read more about [memory overcommit](memory-overcommit.md). )", 0) \ - M(UInt64, max_untracked_memory, (4 * 1024 * 1024), R"( + DECLARE(UInt64, max_untracked_memory, (4 * 1024 * 1024), R"( Small allocations and deallocations are grouped in thread local variable and tracked or profiled only when an amount (in absolute value) becomes larger than the specified value. If the value is higher than 'memory_profiler_step' it will be effectively lowered to 'memory_profiler_step'. )", 0) \ - M(UInt64, memory_profiler_step, (4 * 1024 * 1024), R"( + DECLARE(UInt64, memory_profiler_step, (4 * 1024 * 1024), R"( Sets the step of memory profiler. Whenever query memory usage becomes larger than every next step in number of bytes the memory profiler will collect the allocating stacktrace and will write it into [trace_log](../../operations/system-tables/trace_log.md#system_tables-trace_log). Possible values: @@ -2605,16 +2592,16 @@ Possible values: - 0 for turning off the memory profiler. )", 0) \ - M(Float, memory_profiler_sample_probability, 0., R"( + DECLARE(Float, memory_profiler_sample_probability, 0., R"( Collect random allocations and deallocations and write them into system.trace_log with 'MemorySample' trace_type. The probability is for every alloc/free regardless of the size of the allocation (can be changed with `memory_profiler_sample_min_allocation_size` and `memory_profiler_sample_max_allocation_size`). Note that sampling happens only when the amount of untracked memory exceeds 'max_untracked_memory'. You may want to set 'max_untracked_memory' to 0 for extra fine-grained sampling. )", 0) \ - M(UInt64, memory_profiler_sample_min_allocation_size, 0, R"( + DECLARE(UInt64, memory_profiler_sample_min_allocation_size, 0, R"( Collect random allocations of size greater or equal than the specified value with probability equal to `memory_profiler_sample_probability`. 0 means disabled. You may want to set 'max_untracked_memory' to 0 to make this threshold work as expected. )", 0) \ - M(UInt64, memory_profiler_sample_max_allocation_size, 0, R"( + DECLARE(UInt64, memory_profiler_sample_max_allocation_size, 0, R"( Collect random allocations of size less or equal than the specified value with probability equal to `memory_profiler_sample_probability`. 0 means disabled. You may want to set 'max_untracked_memory' to 0 to make this threshold work as expected. )", 0) \ - M(Bool, trace_profile_events, false, R"( + DECLARE(Bool, trace_profile_events, false, R"( Enables or disables collecting stacktraces on each update of profile events along with the name of profile event and the value of increment and sending them into [trace_log](../../operations/system-tables/trace_log.md#system_tables-trace_log). Possible values: @@ -2623,13 +2610,13 @@ Possible values: - 0 — Tracing of profile events disabled. )", 0) \ \ - M(UInt64, memory_usage_overcommit_max_wait_microseconds, 5'000'000, R"( + DECLARE(UInt64, memory_usage_overcommit_max_wait_microseconds, 5'000'000, R"( Maximum time thread will wait for memory to be freed in the case of memory overcommit on a user level. If the timeout is reached and memory is not freed, an exception is thrown. Read more about [memory overcommit](memory-overcommit.md). )", 0) \ \ - M(UInt64, max_network_bandwidth, 0, R"( + DECLARE(UInt64, max_network_bandwidth, 0, R"( Limits the speed of the data exchange over the network in bytes per second. This setting applies to every query. Possible values: @@ -2637,7 +2624,7 @@ Possible values: - Positive integer. - 0 — Bandwidth control is disabled. )", 0) \ - M(UInt64, max_network_bytes, 0, R"( + DECLARE(UInt64, max_network_bytes, 0, R"( Limits the data volume (in bytes) that is received or transmitted over the network when executing a query. This setting applies to every individual query. Possible values: @@ -2645,7 +2632,7 @@ Possible values: - Positive integer. - 0 — Data volume control is disabled. )", 0) \ - M(UInt64, max_network_bandwidth_for_user, 0, R"( + DECLARE(UInt64, max_network_bandwidth_for_user, 0, R"( Limits the speed of the data exchange over the network in bytes per second. This setting applies to all concurrently running queries performed by a single user. Possible values: @@ -2653,7 +2640,7 @@ Possible values: - Positive integer. - 0 — Control of the data speed is disabled. )", 0)\ - M(UInt64, max_network_bandwidth_for_all_users, 0, R"( + DECLARE(UInt64, max_network_bandwidth_for_all_users, 0, R"( Limits the speed that data is exchanged at over the network in bytes per second. This setting applies to all concurrently running queries on the server. Possible values: @@ -2662,51 +2649,51 @@ Possible values: - 0 — Control of the data speed is disabled. )", 0) \ \ - M(UInt64, max_temporary_data_on_disk_size_for_user, 0, R"( + DECLARE(UInt64, max_temporary_data_on_disk_size_for_user, 0, R"( The maximum amount of data consumed by temporary files on disk in bytes for all concurrently running user queries. Zero means unlimited. )", 0)\ - M(UInt64, max_temporary_data_on_disk_size_for_query, 0, R"( + DECLARE(UInt64, max_temporary_data_on_disk_size_for_query, 0, R"( The maximum amount of data consumed by temporary files on disk in bytes for all concurrently running queries. Zero means unlimited. )", 0)\ \ - M(UInt64, backup_restore_keeper_max_retries, 20, R"( + DECLARE(UInt64, backup_restore_keeper_max_retries, 20, R"( Max retries for keeper operations during backup or restore )", 0) \ - M(UInt64, backup_restore_keeper_retry_initial_backoff_ms, 100, R"( + DECLARE(UInt64, backup_restore_keeper_retry_initial_backoff_ms, 100, R"( Initial backoff timeout for [Zoo]Keeper operations during backup or restore )", 0) \ - M(UInt64, backup_restore_keeper_retry_max_backoff_ms, 5000, R"( + DECLARE(UInt64, backup_restore_keeper_retry_max_backoff_ms, 5000, R"( Max backoff timeout for [Zoo]Keeper operations during backup or restore )", 0) \ - M(Float, backup_restore_keeper_fault_injection_probability, 0.0f, R"( + DECLARE(Float, backup_restore_keeper_fault_injection_probability, 0.0f, R"( Approximate probability of failure for a keeper request during backup or restore. Valid value is in interval [0.0f, 1.0f] )", 0) \ - M(UInt64, backup_restore_keeper_fault_injection_seed, 0, R"( + DECLARE(UInt64, backup_restore_keeper_fault_injection_seed, 0, R"( 0 - random seed, otherwise the setting value )", 0) \ - M(UInt64, backup_restore_keeper_value_max_size, 1048576, R"( + DECLARE(UInt64, backup_restore_keeper_value_max_size, 1048576, R"( Maximum size of data of a [Zoo]Keeper's node during backup )", 0) \ - M(UInt64, backup_restore_batch_size_for_keeper_multiread, 10000, R"( + DECLARE(UInt64, backup_restore_batch_size_for_keeper_multiread, 10000, R"( Maximum size of batch for multiread request to [Zoo]Keeper during backup or restore )", 0) \ - M(UInt64, backup_restore_batch_size_for_keeper_multi, 1000, R"( + DECLARE(UInt64, backup_restore_batch_size_for_keeper_multi, 1000, R"( Maximum size of batch for multi request to [Zoo]Keeper during backup or restore )", 0) \ - M(UInt64, backup_restore_s3_retry_attempts, 1000, R"( + DECLARE(UInt64, backup_restore_s3_retry_attempts, 1000, R"( Setting for Aws::Client::RetryStrategy, Aws::Client does retries itself, 0 means no retries. It takes place only for backup/restore. )", 0) \ - M(UInt64, max_backup_bandwidth, 0, R"( + DECLARE(UInt64, max_backup_bandwidth, 0, R"( The maximum read speed in bytes per second for particular backup on server. Zero means unlimited. )", 0) \ \ - M(Bool, log_profile_events, true, R"( + DECLARE(Bool, log_profile_events, true, R"( Log query performance statistics into the query_log, query_thread_log and query_views_log. )", 0) \ - M(Bool, log_query_settings, true, R"( + DECLARE(Bool, log_query_settings, true, R"( Log query settings into the query_log and OpenTelemetry span log. )", 0) \ - M(Bool, log_query_threads, false, R"( + DECLARE(Bool, log_query_threads, false, R"( Setting up query threads logging. Query threads log into the [system.query_thread_log](../../operations/system-tables/query_thread_log.md) table. This setting has effect only when [log_queries](#log-queries) is true. Queries’ threads run by ClickHouse with this setup are logged according to the rules in the [query_thread_log](../../operations/server-configuration-parameters/settings.md/#query_thread_log) server configuration parameter. @@ -2722,7 +2709,7 @@ Possible values: log_query_threads=1 ``` )", 0) \ - M(Bool, log_query_views, true, R"( + DECLARE(Bool, log_query_views, true, R"( Setting up query views logging. When a query run by ClickHouse with this setting enabled has associated views (materialized or live views), they are logged in the [query_views_log](../../operations/server-configuration-parameters/settings.md/#query_views_log) server configuration parameter. @@ -2733,7 +2720,7 @@ Example: log_query_views=1 ``` )", 0) \ - M(String, log_comment, "", R"( + DECLARE(String, log_comment, "", R"( Specifies the value for the `log_comment` field of the [system.query_log](../system-tables/query_log.md) table and comment text for the server log. It can be used to improve the readability of server logs. Additionally, it helps to select queries related to the test from the `system.query_log` after running [clickhouse-test](../../development/tests.md). @@ -2762,13 +2749,13 @@ Result: └─────────────┴───────────┘ ``` )", 0) \ - M(LogsLevel, send_logs_level, LogsLevel::fatal, R"( + DECLARE(LogsLevel, send_logs_level, LogsLevel::fatal, R"( Send server text logs with specified minimum level to client. Valid values: 'trace', 'debug', 'information', 'warning', 'error', 'fatal', 'none' )", 0) \ - M(String, send_logs_source_regexp, "", R"( + DECLARE(String, send_logs_source_regexp, "", R"( Send server text logs with specified regexp to match log source name. Empty means all sources. )", 0) \ - M(Bool, enable_optimize_predicate_expression, true, R"( + DECLARE(Bool, enable_optimize_predicate_expression, true, R"( Turns on predicate pushdown in `SELECT` queries. Predicate pushdown may significantly reduce network traffic for distributed queries. @@ -2789,21 +2776,21 @@ If `enable_optimize_predicate_expression = 1`, then the execution time of these If `enable_optimize_predicate_expression = 0`, then the execution time of the second query is much longer because the `WHERE` clause applies to all the data after the subquery finishes. )", 0) \ - M(Bool, enable_optimize_predicate_expression_to_final_subquery, true, R"( + DECLARE(Bool, enable_optimize_predicate_expression_to_final_subquery, true, R"( Allow push predicate to final subquery. )", 0) \ - M(Bool, allow_push_predicate_when_subquery_contains_with, true, R"( + DECLARE(Bool, allow_push_predicate_when_subquery_contains_with, true, R"( Allows push predicate when subquery contains WITH clause )", 0) \ \ - M(UInt64, low_cardinality_max_dictionary_size, 8192, R"( + DECLARE(UInt64, low_cardinality_max_dictionary_size, 8192, R"( Sets a maximum size in rows of a shared global dictionary for the [LowCardinality](../../sql-reference/data-types/lowcardinality.md) data type that can be written to a storage file system. This setting prevents issues with RAM in case of unlimited dictionary growth. All the data that can’t be encoded due to maximum dictionary size limitation ClickHouse writes in an ordinary method. Possible values: - Any positive integer. )", 0) \ - M(Bool, low_cardinality_use_single_dictionary_for_part, false, R"( + DECLARE(Bool, low_cardinality_use_single_dictionary_for_part, false, R"( Turns on or turns off using of single dictionary for the data part. By default, the ClickHouse server monitors the size of dictionaries and if a dictionary overflows then the server starts to write the next one. To prohibit creating several dictionaries set `low_cardinality_use_single_dictionary_for_part = 1`. @@ -2813,14 +2800,14 @@ Possible values: - 1 — Creating several dictionaries for the data part is prohibited. - 0 — Creating several dictionaries for the data part is not prohibited. )", 0) \ - M(Bool, decimal_check_overflow, true, R"( + DECLARE(Bool, decimal_check_overflow, true, R"( Check overflow of decimal arithmetic/comparison operations )", 0) \ - M(Bool, allow_custom_error_code_in_throwif, false, R"( + DECLARE(Bool, allow_custom_error_code_in_throwif, false, R"( Enable custom error code in function throwIf(). If true, thrown exceptions may have unexpected error codes. )", 0) \ \ - M(Bool, prefer_localhost_replica, true, R"( + DECLARE(Bool, prefer_localhost_replica, true, R"( Enables/disables preferable using the localhost replica when processing distributed queries. Possible values: @@ -2834,28 +2821,28 @@ If [parallel_replicas_custom_key](#parallel_replicas_custom_key) is set, disable If it's used on a cluster with a single shard and multiple replicas, disabling this setting will have negative effects. ::: )", 0) \ - M(UInt64, max_fetch_partition_retries_count, 5, R"( + DECLARE(UInt64, max_fetch_partition_retries_count, 5, R"( Amount of retries while fetching partition from another host. )", 0) \ - M(UInt64, http_max_multipart_form_data_size, 1024 * 1024 * 1024, R"( + DECLARE(UInt64, http_max_multipart_form_data_size, 1024 * 1024 * 1024, R"( Limit on size of multipart/form-data content. This setting cannot be parsed from URL parameters and should be set in a user profile. Note that content is parsed and external tables are created in memory before the start of query execution. And this is the only limit that has an effect on that stage (limits on max memory usage and max execution time have no effect while reading HTTP form data). )", 0) \ - M(Bool, calculate_text_stack_trace, true, R"( + DECLARE(Bool, calculate_text_stack_trace, true, R"( Calculate text stack trace in case of exceptions during query execution. This is the default. It requires symbol lookups that may slow down fuzzing tests when a huge amount of wrong queries are executed. In normal cases, you should not disable this option. )", 0) \ - M(Bool, enable_job_stack_trace, false, R"( + DECLARE(Bool, enable_job_stack_trace, false, R"( Output stack trace of a job creator when job results in exception )", 0) \ - M(Bool, allow_ddl, true, R"( + DECLARE(Bool, allow_ddl, true, R"( If it is set to true, then a user is allowed to executed DDL queries. )", 0) \ - M(Bool, parallel_view_processing, false, R"( + DECLARE(Bool, parallel_view_processing, false, R"( Enables pushing to attached views concurrently instead of sequentially. )", 0) \ - M(Bool, enable_unaligned_array_join, false, R"( + DECLARE(Bool, enable_unaligned_array_join, false, R"( Allow ARRAY JOIN with multiple arrays that have different sizes. When this settings is enabled, arrays will be resized to the longest one. )", 0) \ - M(Bool, optimize_read_in_order, true, R"( + DECLARE(Bool, optimize_read_in_order, true, R"( Enables [ORDER BY](../../sql-reference/statements/select/order-by.md/#optimize_read_in_order) optimization in [SELECT](../../sql-reference/statements/select/index.md) queries for reading data from [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) tables. Possible values: @@ -2867,10 +2854,10 @@ Possible values: - [ORDER BY Clause](../../sql-reference/statements/select/order-by.md/#optimize_read_in_order) )", 0) \ - M(Bool, optimize_read_in_window_order, true, R"( + DECLARE(Bool, optimize_read_in_window_order, true, R"( Enable ORDER BY optimization in window clause for reading data in corresponding order in MergeTree tables. )", 0) \ - M(Bool, optimize_aggregation_in_order, false, R"( + DECLARE(Bool, optimize_aggregation_in_order, false, R"( Enables [GROUP BY](../../sql-reference/statements/select/group-by.md) optimization in [SELECT](../../sql-reference/statements/select/index.md) queries for aggregating data in corresponding order in [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) tables. Possible values: @@ -2882,16 +2869,16 @@ Possible values: - [GROUP BY optimization](../../sql-reference/statements/select/group-by.md/#aggregation-in-order) )", 0) \ - M(Bool, read_in_order_use_buffering, true, R"( + DECLARE(Bool, read_in_order_use_buffering, true, R"( Use buffering before merging while reading in order of primary key. It increases the parallelism of query execution )", 0) \ - M(UInt64, aggregation_in_order_max_block_bytes, 50000000, R"( + DECLARE(UInt64, aggregation_in_order_max_block_bytes, 50000000, R"( Maximal size of block in bytes accumulated during aggregation in order of primary key. Lower block size allows to parallelize more final merge stage of aggregation. )", 0) \ - M(UInt64, read_in_order_two_level_merge_threshold, 100, R"( + DECLARE(UInt64, read_in_order_two_level_merge_threshold, 100, R"( Minimal number of parts to read to run preliminary merge step during multithread reading in order of primary key. )", 0) \ - M(Bool, low_cardinality_allow_in_native_format, true, R"( + DECLARE(Bool, low_cardinality_allow_in_native_format, true, R"( Allows or restricts using the [LowCardinality](../../sql-reference/data-types/lowcardinality.md) data type with the [Native](../../interfaces/formats.md/#native) format. If usage of `LowCardinality` is restricted, ClickHouse server converts `LowCardinality`-columns to ordinary ones for `SELECT` queries, and convert ordinary columns to `LowCardinality`-columns for `INSERT` queries. @@ -2903,12 +2890,12 @@ Possible values: - 1 — Usage of `LowCardinality` is not restricted. - 0 — Usage of `LowCardinality` is restricted. )", 0) \ - M(Bool, cancel_http_readonly_queries_on_client_close, false, R"( + DECLARE(Bool, cancel_http_readonly_queries_on_client_close, false, R"( Cancels HTTP read-only queries (e.g. SELECT) when a client closes the connection without waiting for the response. Cloud default value: `1`. )", 0) \ - M(Bool, external_table_functions_use_nulls, true, R"( + DECLARE(Bool, external_table_functions_use_nulls, true, R"( Defines how [mysql](../../sql-reference/table-functions/mysql.md), [postgresql](../../sql-reference/table-functions/postgresql.md) and [odbc](../../sql-reference/table-functions/odbc.md) table functions use Nullable columns. Possible values: @@ -2920,14 +2907,14 @@ Possible values: If the setting is set to `0`, the table function does not make Nullable columns and inserts default values instead of NULL. This is also applicable for NULL values inside arrays. )", 0) \ - M(Bool, external_table_strict_query, false, R"( + DECLARE(Bool, external_table_strict_query, false, R"( If it is set to true, transforming expression to local filter is forbidden for queries to external tables. )", 0) \ \ - M(Bool, allow_hyperscan, true, R"( + DECLARE(Bool, allow_hyperscan, true, R"( Allow functions that use Hyperscan library. Disable to avoid potentially long compilation times and excessive resource usage. )", 0) \ - M(UInt64, max_hyperscan_regexp_length, 0, R"( + DECLARE(UInt64, max_hyperscan_regexp_length, 0, R"( Defines the maximum length for each regular expression in the [hyperscan multi-match functions](../../sql-reference/functions/string-search-functions.md/#multimatchanyhaystack-pattern1-pattern2-patternn). Possible values: @@ -2967,7 +2954,7 @@ Exception: Regexp length too large. - [max_hyperscan_regexp_total_length](#max-hyperscan-regexp-total-length) )", 0) \ - M(UInt64, max_hyperscan_regexp_total_length, 0, R"( + DECLARE(UInt64, max_hyperscan_regexp_total_length, 0, R"( Sets the maximum length total of all regular expressions in each [hyperscan multi-match function](../../sql-reference/functions/string-search-functions.md/#multimatchanyhaystack-pattern1-pattern2-patternn). Possible values: @@ -3007,13 +2994,13 @@ Exception: Total regexp lengths too large. - [max_hyperscan_regexp_length](#max-hyperscan-regexp-length) )", 0) \ - M(Bool, reject_expensive_hyperscan_regexps, true, R"( + DECLARE(Bool, reject_expensive_hyperscan_regexps, true, R"( Reject patterns which will likely be expensive to evaluate with hyperscan (due to NFA state explosion) )", 0) \ - M(Bool, allow_simdjson, true, R"( + DECLARE(Bool, allow_simdjson, true, R"( Allow using simdjson library in 'JSON*' functions if AVX2 instructions are available. If disabled rapidjson will be used. )", 0) \ - M(Bool, allow_introspection_functions, false, R"( + DECLARE(Bool, allow_introspection_functions, false, R"( Enables or disables [introspection functions](../../sql-reference/functions/introspection.md) for query profiling. Possible values: @@ -3026,7 +3013,7 @@ Possible values: - [Sampling Query Profiler](../../operations/optimizing-performance/sampling-query-profiler.md) - System table [trace_log](../../operations/system-tables/trace_log.md/#system_tables-trace_log) )", 0) \ - M(Bool, splitby_max_substrings_includes_remaining_string, false, R"( + DECLARE(Bool, splitby_max_substrings_includes_remaining_string, false, R"( Controls whether function [splitBy*()](../../sql-reference/functions/splitting-merging-functions.md) with argument `max_substrings` > 0 will include the remaining string in the last element of the result array. Possible values: @@ -3035,32 +3022,32 @@ Possible values: - `1` - The remaining string will be included in the last element of the result array. This is the behavior of Spark's [`split()`](https://spark.apache.org/docs/3.1.2/api/python/reference/api/pyspark.sql.functions.split.html) function and Python's ['string.split()'](https://docs.python.org/3/library/stdtypes.html#str.split) method. )", 0) \ \ - M(Bool, allow_execute_multiif_columnar, true, R"( + DECLARE(Bool, allow_execute_multiif_columnar, true, R"( Allow execute multiIf function columnar )", 0) \ - M(Bool, formatdatetime_f_prints_single_zero, false, R"( + DECLARE(Bool, formatdatetime_f_prints_single_zero, false, R"( Formatter '%f' in function 'formatDateTime()' prints a single zero instead of six zeros if the formatted value has no fractional seconds. )", 0) \ - M(Bool, formatdatetime_parsedatetime_m_is_month_name, true, R"( + DECLARE(Bool, formatdatetime_parsedatetime_m_is_month_name, true, R"( Formatter '%M' in functions 'formatDateTime()' and 'parseDateTime()' print/parse the month name instead of minutes. )", 0) \ - M(Bool, parsedatetime_parse_without_leading_zeros, true, R"( + DECLARE(Bool, parsedatetime_parse_without_leading_zeros, true, R"( Formatters '%c', '%l' and '%k' in function 'parseDateTime()' parse months and hours without leading zeros. )", 0) \ - M(Bool, formatdatetime_format_without_leading_zeros, false, R"( + DECLARE(Bool, formatdatetime_format_without_leading_zeros, false, R"( Formatters '%c', '%l' and '%k' in function 'formatDateTime()' print months and hours without leading zeros. )", 0) \ \ - M(UInt64, max_partitions_per_insert_block, 100, R"( + DECLARE(UInt64, max_partitions_per_insert_block, 100, R"( Limit maximum number of partitions in the single INSERTed block. Zero means unlimited. Throw an exception if the block contains too many partitions. This setting is a safety threshold because using a large number of partitions is a common misconception. )", 0) \ - M(Bool, throw_on_max_partitions_per_insert_block, true, R"( + DECLARE(Bool, throw_on_max_partitions_per_insert_block, true, R"( Used with max_partitions_per_insert_block. If true (default), an exception will be thrown when max_partitions_per_insert_block is reached. If false, details of the insert query reaching this limit with the number of partitions will be logged. This can be useful if you're trying to understand the impact on users when changing max_partitions_per_insert_block. )", 0) \ - M(Int64, max_partitions_to_read, -1, R"( + DECLARE(Int64, max_partitions_to_read, -1, R"( Limit the max number of partitions that can be accessed in one query. <= 0 means unlimited. )", 0) \ - M(Bool, check_query_single_value_result, true, R"( + DECLARE(Bool, check_query_single_value_result, true, R"( Defines the level of detail for the [CHECK TABLE](../../sql-reference/statements/check-table.md/#checking-mergetree-tables) query result for `MergeTree` family engines . Possible values: @@ -3068,12 +3055,12 @@ Possible values: - 0 — the query shows a check status for every individual data part of a table. - 1 — the query shows the general table check status. )", 0) \ - M(Bool, allow_drop_detached, false, R"( + DECLARE(Bool, allow_drop_detached, false, R"( Allow ALTER TABLE ... DROP DETACHED PART[ITION] ... queries )", 0) \ - M(UInt64, max_parts_to_move, 1000, "Limit the number of parts that can be moved in one query. Zero means unlimited.", 0) \ + DECLARE(UInt64, max_parts_to_move, 1000, "Limit the number of parts that can be moved in one query. Zero means unlimited.", 0) \ \ - M(UInt64, max_table_size_to_drop, 50000000000lu, R"( + DECLARE(UInt64, max_table_size_to_drop, 50000000000lu, R"( Restriction on deleting tables in query time. The value 0 means that you can delete all tables without any restrictions. Cloud default value: 1 TB. @@ -3082,7 +3069,7 @@ Cloud default value: 1 TB. This query setting overwrites its server setting equivalent, see [max_table_size_to_drop](/docs/en/operations/server-configuration-parameters/settings.md/#max-table-size-to-drop) ::: )", 0) \ - M(UInt64, max_partition_size_to_drop, 50000000000lu, R"( + DECLARE(UInt64, max_partition_size_to_drop, 50000000000lu, R"( Restriction on dropping partitions in query time. The value 0 means that you can drop partitions without any restrictions. Cloud default value: 1 TB. @@ -3092,33 +3079,33 @@ This query setting overwrites its server setting equivalent, see [max_partition_ ::: )", 0) \ \ - M(UInt64, postgresql_connection_pool_size, 16, R"( + DECLARE(UInt64, postgresql_connection_pool_size, 16, R"( Connection pool size for PostgreSQL table engine and database engine. )", 0) \ - M(UInt64, postgresql_connection_attempt_timeout, 2, R"( + DECLARE(UInt64, postgresql_connection_attempt_timeout, 2, R"( Connection timeout in seconds of a single attempt to connect PostgreSQL end-point. The value is passed as a `connect_timeout` parameter of the connection URL. )", 0) \ - M(UInt64, postgresql_connection_pool_wait_timeout, 5000, R"( + DECLARE(UInt64, postgresql_connection_pool_wait_timeout, 5000, R"( Connection pool push/pop timeout on empty pool for PostgreSQL table engine and database engine. By default it will block on empty pool. )", 0) \ - M(UInt64, postgresql_connection_pool_retries, 2, R"( + DECLARE(UInt64, postgresql_connection_pool_retries, 2, R"( Connection pool push/pop retries number for PostgreSQL table engine and database engine. )", 0) \ - M(Bool, postgresql_connection_pool_auto_close_connection, false, R"( + DECLARE(Bool, postgresql_connection_pool_auto_close_connection, false, R"( Close connection before returning connection to the pool. )", 0) \ - M(UInt64, glob_expansion_max_elements, 1000, R"( + DECLARE(UInt64, glob_expansion_max_elements, 1000, R"( Maximum number of allowed addresses (For external storages, table functions, etc). )", 0) \ - M(UInt64, odbc_bridge_connection_pool_size, 16, R"( + DECLARE(UInt64, odbc_bridge_connection_pool_size, 16, R"( Connection pool size for each connection settings string in ODBC bridge. )", 0) \ - M(Bool, odbc_bridge_use_connection_pooling, true, R"( + DECLARE(Bool, odbc_bridge_use_connection_pooling, true, R"( Use connection pooling in ODBC bridge. If set to false, a new connection is created every time. )", 0) \ \ - M(Seconds, distributed_replica_error_half_life, DBMS_CONNECTION_POOL_WITH_FAILOVER_DEFAULT_DECREASE_ERROR_PERIOD, R"( + DECLARE(Seconds, distributed_replica_error_half_life, DBMS_CONNECTION_POOL_WITH_FAILOVER_DEFAULT_DECREASE_ERROR_PERIOD, R"( - Type: seconds - Default value: 60 seconds @@ -3131,7 +3118,7 @@ See also: - [distributed_replica_error_cap](#distributed_replica_error_cap) - [distributed_replica_max_ignored_errors](#distributed_replica_max_ignored_errors) )", 0) \ - M(UInt64, distributed_replica_error_cap, DBMS_CONNECTION_POOL_WITH_FAILOVER_MAX_ERROR_COUNT, R"( + DECLARE(UInt64, distributed_replica_error_cap, DBMS_CONNECTION_POOL_WITH_FAILOVER_MAX_ERROR_COUNT, R"( - Type: unsigned int - Default value: 1000 @@ -3144,7 +3131,7 @@ See also: - [distributed_replica_error_half_life](#distributed_replica_error_half_life) - [distributed_replica_max_ignored_errors](#distributed_replica_max_ignored_errors) )", 0) \ - M(UInt64, distributed_replica_max_ignored_errors, 0, R"( + DECLARE(UInt64, distributed_replica_max_ignored_errors, 0, R"( - Type: unsigned int - Default value: 0 @@ -3158,11 +3145,11 @@ See also: - [distributed_replica_error_half_life](#distributed_replica_error_half_life) )", 0) \ \ - M(UInt64, min_free_disk_space_for_temporary_data, 0, R"( + DECLARE(UInt64, min_free_disk_space_for_temporary_data, 0, R"( The minimum disk space to keep while writing temporary data used in external sorting and aggregation. )", 0) \ \ - M(DefaultTableEngine, default_temporary_table_engine, DefaultTableEngine::Memory, R"( + DECLARE(DefaultTableEngine, default_temporary_table_engine, DefaultTableEngine::Memory, R"( Same as [default_table_engine](#default_table_engine) but for temporary tables. In this example, any new temporary table that does not specify an `Engine` will use the `Log` table engine: @@ -3193,7 +3180,7 @@ ENGINE = Log └──────────────────────────────────────────────────────────────────────────┘ ``` )", 0) \ - M(DefaultTableEngine, default_table_engine, DefaultTableEngine::MergeTree, R"( + DECLARE(DefaultTableEngine, default_table_engine, DefaultTableEngine::MergeTree, R"( Default table engine to use when `ENGINE` is not set in a `CREATE` statement. Possible values: @@ -3246,7 +3233,7 @@ ENGINE = Log └──────────────────────────────────────────────────────────────────────────┘ ``` )", 0) \ - M(Bool, show_table_uuid_in_table_create_query_if_not_nil, false, R"( + DECLARE(Bool, show_table_uuid_in_table_create_query_if_not_nil, false, R"( Sets the `SHOW TABLE` query display. Possible values: @@ -3254,7 +3241,7 @@ Possible values: - 0 — The query will be displayed without table UUID. - 1 — The query will be displayed with table UUID. )", 0) \ - M(Bool, database_atomic_wait_for_drop_and_detach_synchronously, false, R"( + DECLARE(Bool, database_atomic_wait_for_drop_and_detach_synchronously, false, R"( Adds a modifier `SYNC` to all `DROP` and `DETACH` queries. Possible values: @@ -3262,10 +3249,10 @@ Possible values: - 0 — Queries will be executed with delay. - 1 — Queries will be executed without delay. )", 0) \ - M(Bool, enable_scalar_subquery_optimization, true, R"( + DECLARE(Bool, enable_scalar_subquery_optimization, true, R"( If it is set to true, prevent scalar subqueries from (de)serializing large scalar values and possibly avoid running the same subquery more than once. )", 0) \ - M(Bool, optimize_trivial_count_query, true, R"( + DECLARE(Bool, optimize_trivial_count_query, true, R"( Enables or disables the optimization to trivial query `SELECT count() FROM table` using metadata from MergeTree. If you need to use row-level security, disable this setting. Possible values: @@ -3277,7 +3264,7 @@ See also: - [optimize_functions_to_subcolumns](#optimize-functions-to-subcolumns) )", 0) \ - M(Bool, optimize_trivial_approximate_count_query, false, R"( + DECLARE(Bool, optimize_trivial_approximate_count_query, false, R"( Use an approximate value for trivial count optimization of storages that support such estimation, for example, EmbeddedRocksDB. Possible values: @@ -3285,7 +3272,7 @@ Possible values: - 0 — Optimization disabled. - 1 — Optimization enabled. )", 0) \ - M(Bool, optimize_count_from_files, true, R"( + DECLARE(Bool, optimize_count_from_files, true, R"( Enables or disables the optimization of counting number of rows from files in different input formats. It applies to table functions/engines `file`/`s3`/`url`/`hdfs`/`azureBlobStorage`. Possible values: @@ -3293,15 +3280,15 @@ Possible values: - 0 — Optimization disabled. - 1 — Optimization enabled. )", 0) \ - M(Bool, use_cache_for_count_from_files, true, R"( + DECLARE(Bool, use_cache_for_count_from_files, true, R"( Enables caching of rows number during count from files in table functions `file`/`s3`/`url`/`hdfs`/`azureBlobStorage`. Enabled by default. )", 0) \ - M(Bool, optimize_respect_aliases, true, R"( + DECLARE(Bool, optimize_respect_aliases, true, R"( If it is set to true, it will respect aliases in WHERE/GROUP BY/ORDER BY, that will help with partition pruning/secondary indexes/optimize_aggregation_in_order/optimize_read_in_order/optimize_trivial_count )", 0) \ - M(UInt64, mutations_sync, 0, R"( + DECLARE(UInt64, mutations_sync, 0, R"( Allows to execute `ALTER TABLE ... UPDATE|DELETE|MATERIALIZE INDEX|MATERIALIZE PROJECTION|MATERIALIZE COLUMN` queries ([mutations](../../sql-reference/statements/alter/index.md#mutations)) synchronously. Possible values: @@ -3310,10 +3297,10 @@ Possible values: - 1 - The query waits for all mutations to complete on the current server. - 2 - The query waits for all mutations to complete on all replicas (if they exist). )", 0) \ - M(Bool, enable_lightweight_delete, true, R"( + DECLARE(Bool, enable_lightweight_delete, true, R"( Enable lightweight DELETE mutations for mergetree tables. )", 0) ALIAS(allow_experimental_lightweight_delete) \ - M(UInt64, lightweight_deletes_sync, 2, R"( + DECLARE(UInt64, lightweight_deletes_sync, 2, R"( The same as [`mutations_sync`](#mutations_sync), but controls only execution of lightweight deletes. Possible values: @@ -3327,16 +3314,16 @@ Possible values: - [Synchronicity of ALTER Queries](../../sql-reference/statements/alter/index.md#synchronicity-of-alter-queries) - [Mutations](../../sql-reference/statements/alter/index.md#mutations) )", 0) \ - M(Bool, apply_deleted_mask, true, R"( + DECLARE(Bool, apply_deleted_mask, true, R"( Enables filtering out rows deleted with lightweight DELETE. If disabled, a query will be able to read those rows. This is useful for debugging and \"undelete\" scenarios )", 0) \ - M(Bool, optimize_normalize_count_variants, true, R"( + DECLARE(Bool, optimize_normalize_count_variants, true, R"( Rewrite aggregate functions that semantically equals to count() as count(). )", 0) \ - M(Bool, optimize_injective_functions_inside_uniq, true, R"( + DECLARE(Bool, optimize_injective_functions_inside_uniq, true, R"( Delete injective functions of one argument inside uniq*() functions. )", 0) \ - M(Bool, rewrite_count_distinct_if_with_count_distinct_implementation, false, R"( + DECLARE(Bool, rewrite_count_distinct_if_with_count_distinct_implementation, false, R"( Allows you to rewrite `countDistcintIf` with [count_distinct_implementation](#count_distinct_implementation) setting. Possible values: @@ -3344,7 +3331,7 @@ Possible values: - true — Allow. - false — Disallow. )", 0) \ - M(Bool, convert_query_to_cnf, false, R"( + DECLARE(Bool, convert_query_to_cnf, false, R"( When set to `true`, a `SELECT` query will be converted to conjuctive normal form (CNF). There are scenarios where rewriting a query in CNF may execute faster (view this [Github issue](https://github.com/ClickHouse/ClickHouse/issues/11749) for an explanation). For example, notice how the following `SELECT` query is not modified (the default behavior): @@ -3409,25 +3396,25 @@ Notice the `WHERE` clause is rewritten in CNF, but the result set is the identic Possible values: true, false )", 0) \ - M(Bool, optimize_or_like_chain, false, R"( + DECLARE(Bool, optimize_or_like_chain, false, R"( Optimize multiple OR LIKE into multiMatchAny. This optimization should not be enabled by default, because it defies index analysis in some cases. )", 0) \ - M(Bool, optimize_arithmetic_operations_in_aggregate_functions, true, R"( + DECLARE(Bool, optimize_arithmetic_operations_in_aggregate_functions, true, R"( Move arithmetic operations out of aggregation functions )", 0) \ - M(Bool, optimize_redundant_functions_in_order_by, true, R"( + DECLARE(Bool, optimize_redundant_functions_in_order_by, true, R"( Remove functions from ORDER BY if its argument is also in ORDER BY )", 0) \ - M(Bool, optimize_if_chain_to_multiif, false, R"( + DECLARE(Bool, optimize_if_chain_to_multiif, false, R"( Replace if(cond1, then1, if(cond2, ...)) chains to multiIf. Currently it's not beneficial for numeric types. )", 0) \ - M(Bool, optimize_multiif_to_if, true, R"( + DECLARE(Bool, optimize_multiif_to_if, true, R"( Replace 'multiIf' with only one condition to 'if'. )", 0) \ - M(Bool, optimize_if_transform_strings_to_enum, false, R"( + DECLARE(Bool, optimize_if_transform_strings_to_enum, false, R"( Replaces string-type arguments in If and Transform to enum. Disabled by default cause it could make inconsistent change in distributed query that would lead to its fail. )", 0) \ - M(Bool, optimize_functions_to_subcolumns, true, R"( + DECLARE(Bool, optimize_functions_to_subcolumns, true, R"( Enables or disables optimization by transforming some functions to reading subcolumns. This reduces the amount of data to read. These functions can be transformed: @@ -3446,37 +3433,37 @@ Possible values: - 0 — Optimization disabled. - 1 — Optimization enabled. )", 0) \ - M(Bool, optimize_using_constraints, false, R"( + DECLARE(Bool, optimize_using_constraints, false, R"( Use [constraints](../../sql-reference/statements/create/table.md#constraints) for query optimization. The default is `false`. Possible values: - true, false )", 0) \ - M(Bool, optimize_substitute_columns, false, R"( + DECLARE(Bool, optimize_substitute_columns, false, R"( Use [constraints](../../sql-reference/statements/create/table.md#constraints) for column substitution. The default is `false`. Possible values: - true, false )", 0) \ - M(Bool, optimize_append_index, false, R"( + DECLARE(Bool, optimize_append_index, false, R"( Use [constraints](../../sql-reference/statements/create/table.md#constraints) in order to append index condition. The default is `false`. Possible values: - true, false )", 0) \ - M(Bool, optimize_time_filter_with_preimage, true, R"( + DECLARE(Bool, optimize_time_filter_with_preimage, true, R"( Optimize Date and DateTime predicates by converting functions into equivalent comparisons without conversions (e.g. toYear(col) = 2023 -> col >= '2023-01-01' AND col <= '2023-12-31') )", 0) \ - M(Bool, normalize_function_names, true, R"( + DECLARE(Bool, normalize_function_names, true, R"( Normalize function names to their canonical names )", 0) \ - M(Bool, enable_early_constant_folding, true, R"( + DECLARE(Bool, enable_early_constant_folding, true, R"( Enable query optimization where we analyze function and subqueries results and rewrite query if there are constants there )", 0) \ - M(Bool, deduplicate_blocks_in_dependent_materialized_views, false, R"( + DECLARE(Bool, deduplicate_blocks_in_dependent_materialized_views, false, R"( Enables or disables the deduplication check for materialized views that receive data from Replicated\* tables. Possible values: @@ -3491,19 +3478,19 @@ If an INSERTed block is skipped due to deduplication in the source table, there At the same time, this behaviour “breaks” `INSERT` idempotency. If an `INSERT` into the main table was successful and `INSERT` into a materialized view failed (e.g. because of communication failure with ClickHouse Keeper) a client will get an error and can retry the operation. However, the materialized view won’t receive the second insert because it will be discarded by deduplication in the main (source) table. The setting `deduplicate_blocks_in_dependent_materialized_views` allows for changing this behaviour. On retry, a materialized view will receive the repeat insert and will perform a deduplication check by itself, ignoring check result for the source table, and will insert rows lost because of the first failure. )", 0) \ - M(Bool, throw_if_deduplication_in_dependent_materialized_views_enabled_with_async_insert, true, R"( + DECLARE(Bool, throw_if_deduplication_in_dependent_materialized_views_enabled_with_async_insert, true, R"( Throw exception on INSERT query when the setting `deduplicate_blocks_in_dependent_materialized_views` is enabled along with `async_insert`. It guarantees correctness, because these features can't work together. )", 0) \ - M(Bool, materialized_views_ignore_errors, false, R"( + DECLARE(Bool, materialized_views_ignore_errors, false, R"( Allows to ignore errors for MATERIALIZED VIEW, and deliver original block to the table regardless of MVs )", 0) \ - M(Bool, ignore_materialized_views_with_dropped_target_table, false, R"( + DECLARE(Bool, ignore_materialized_views_with_dropped_target_table, false, R"( Ignore MVs with dropped target table during pushing to views )", 0) \ - M(Bool, allow_materialized_view_with_bad_select, true, R"( + DECLARE(Bool, allow_materialized_view_with_bad_select, true, R"( Allow CREATE MATERIALIZED VIEW with SELECT query that references nonexistent tables or columns. It must still be syntactically valid. Doesn't apply to refreshable MVs. Doesn't apply if the MV schema needs to be inferred from the SELECT query (i.e. if the CREATE has no column list and no TO table). Can be used for creating MV before its source table. )", 0) \ - M(Bool, use_compact_format_in_distributed_parts_names, true, R"( + DECLARE(Bool, use_compact_format_in_distributed_parts_names, true, R"( Uses compact format for storing blocks for background (`distributed_foreground_insert`) INSERT into tables with `Distributed` engine. Possible values: @@ -3516,7 +3503,7 @@ Possible values: - with `use_compact_format_in_distributed_parts_names=1` changing the order of the nodes in the cluster definition, will change the `shard_index`/`replica_index` so be aware. ::: )", 0) \ - M(Bool, validate_polygons, true, R"( + DECLARE(Bool, validate_polygons, true, R"( Enables or disables throwing an exception in the [pointInPolygon](../../sql-reference/functions/geo/index.md#pointinpolygon) function, if the polygon is self-intersecting or self-tangent. Possible values: @@ -3524,7 +3511,7 @@ Possible values: - 0 — Throwing an exception is disabled. `pointInPolygon` accepts invalid polygons and returns possibly incorrect results for them. - 1 — Throwing an exception is enabled. )", 0) \ - M(UInt64, max_parser_depth, DBMS_DEFAULT_MAX_PARSER_DEPTH, R"( + DECLARE(UInt64, max_parser_depth, DBMS_DEFAULT_MAX_PARSER_DEPTH, R"( Limits maximum recursion depth in the recursive descent parser. Allows controlling the stack size. Possible values: @@ -3532,13 +3519,13 @@ Possible values: - Positive integer. - 0 — Recursion depth is unlimited. )", 0) \ - M(UInt64, max_parser_backtracks, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS, R"( + DECLARE(UInt64, max_parser_backtracks, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS, R"( Maximum parser backtracking (how many times it tries different alternatives in the recursive descend parsing process). )", 0) \ - M(UInt64, max_recursive_cte_evaluation_depth, DBMS_RECURSIVE_CTE_MAX_EVALUATION_DEPTH, R"( + DECLARE(UInt64, max_recursive_cte_evaluation_depth, DBMS_RECURSIVE_CTE_MAX_EVALUATION_DEPTH, R"( Maximum limit on recursive CTE evaluation depth )", 0) \ - M(Bool, allow_settings_after_format_in_insert, false, R"( + DECLARE(Bool, allow_settings_after_format_in_insert, false, R"( Control whether `SETTINGS` after `FORMAT` in `INSERT` queries is allowed or not. It is not recommended to use this, since this may interpret part of `SETTINGS` as values. Example: @@ -3563,10 +3550,10 @@ Possible values: Use this setting only for backward compatibility if your use cases depend on old syntax. ::: )", 0) \ - M(Seconds, periodic_live_view_refresh, 60, R"( + DECLARE(Seconds, periodic_live_view_refresh, 60, R"( Interval after which periodically refreshed live view is forced to refresh. )", 0) \ - M(Bool, transform_null_in, false, R"( + DECLARE(Bool, transform_null_in, false, R"( Enables equality of [NULL](../../sql-reference/syntax.md/#null-literal) values for [IN](../../sql-reference/operators/in.md) operator. By default, `NULL` values can’t be compared because `NULL` means undefined value. Thus, comparison `expr = NULL` must always return `false`. With this setting `NULL = NULL` returns `true` for `IN` operator. @@ -3621,7 +3608,7 @@ Result: - [NULL Processing in IN Operators](../../sql-reference/operators/in.md/#in-null-processing) )", 0) \ - M(Bool, allow_nondeterministic_mutations, false, R"( + DECLARE(Bool, allow_nondeterministic_mutations, false, R"( User-level setting that allows mutations on replicated tables to make use of non-deterministic functions such as `dictGet`. Given that, for example, dictionaries, can be out of sync across nodes, mutations that pull values from them are disallowed on replicated tables by default. Enabling this setting allows this behavior, making it the user's responsibility to ensure that the data used is in sync across all nodes. @@ -3641,7 +3628,7 @@ Given that, for example, dictionaries, can be out of sync across nodes, mutation ``` )", 0) \ - M(Seconds, lock_acquire_timeout, DBMS_DEFAULT_LOCK_ACQUIRE_TIMEOUT_SEC, R"( + DECLARE(Seconds, lock_acquire_timeout, DBMS_DEFAULT_LOCK_ACQUIRE_TIMEOUT_SEC, R"( Defines how many seconds a locking request waits before failing. Locking timeout is used to protect from deadlocks while executing read/write operations with tables. When the timeout expires and the locking request fails, the ClickHouse server throws an exception "Locking attempt timed out! Possible deadlock avoided. Client should retry." with error code `DEADLOCK_AVOIDED`. @@ -3651,13 +3638,13 @@ Possible values: - Positive integer (in seconds). - 0 — No locking timeout. )", 0) \ - M(Bool, materialize_ttl_after_modify, true, R"( + DECLARE(Bool, materialize_ttl_after_modify, true, R"( Apply TTL for old data, after ALTER MODIFY TTL query )", 0) \ - M(String, function_implementation, "", R"( + DECLARE(String, function_implementation, "", R"( Choose function implementation for specific target or variant (experimental). If empty enable all of them. )", 0) \ - M(Bool, data_type_default_nullable, false, R"( + DECLARE(Bool, data_type_default_nullable, false, R"( Allows data types without explicit modifiers [NULL or NOT NULL](../../sql-reference/statements/create/table.md/#null-modifiers) in column definition will be [Nullable](../../sql-reference/data-types/nullable.md/#data_type-nullable). Possible values: @@ -3665,7 +3652,7 @@ Possible values: - 1 — The data types in column definitions are set to `Nullable` by default. - 0 — The data types in column definitions are set to not `Nullable` by default. )", 0) \ - M(Bool, cast_keep_nullable, false, R"( + DECLARE(Bool, cast_keep_nullable, false, R"( Enables or disables keeping of the `Nullable` data type in [CAST](../../sql-reference/functions/type-conversion-functions.md/#castx-t) operations. When the setting is enabled and the argument of `CAST` function is `Nullable`, the result is also transformed to `Nullable` type. When the setting is disabled, the result always has the destination type exactly. @@ -3711,10 +3698,10 @@ Result: - [CAST](../../sql-reference/functions/type-conversion-functions.md/#type_conversion_function-cast) function )", 0) \ - M(Bool, cast_ipv4_ipv6_default_on_conversion_error, false, R"( + DECLARE(Bool, cast_ipv4_ipv6_default_on_conversion_error, false, R"( CAST operator into IPv4, CAST operator into IPV6 type, toIPv4, toIPv6 functions will return default value instead of throwing exception on conversion error. )", 0) \ - M(Bool, alter_partition_verbose_result, false, R"( + DECLARE(Bool, alter_partition_verbose_result, false, R"( Enables or disables the display of information about the parts to which the manipulation operations with partitions and parts have been successfully applied. Applicable to [ATTACH PARTITION|PART](../../sql-reference/statements/alter/partition.md/#alter_attach-partition) and to [FREEZE PARTITION](../../sql-reference/statements/alter/partition.md/#alter_freeze-partition). @@ -3726,7 +3713,7 @@ Possible values: **Example** ```sql -CREATE TABLE test(a Int64, d Date, s String) ENGINE = MergeTree PARTITION BY toYYYYMM(d) ORDER BY a; +CREATE TABLE test(a Int64, d Date, s String) ENGINE = MergeTree PARTITION BY toYYYYMDECLARE(d) ORDER BY a; INSERT INTO test VALUES(1, '2021-01-01', ''); INSERT INTO test VALUES(1, '2021-01-01', ''); ALTER TABLE test DETACH PARTITION ID '202101'; @@ -3746,7 +3733,7 @@ ALTER TABLE test FREEZE SETTINGS alter_partition_verbose_result = 1; └──────────────┴──────────────┴──────────────┴─────────────┴───────────────────────────────┴─────────────────────────────────────────────────────────────┘ ``` )", 0) \ - M(Bool, system_events_show_zero_values, false, R"( + DECLARE(Bool, system_events_show_zero_values, false, R"( Allows to select zero-valued events from [`system.events`](../../operations/system-tables/events.md). Some monitoring systems require passing all the metrics values to them for each checkpoint, even if the metric value is zero. @@ -3784,23 +3771,23 @@ Result └──────────────────────────┴───────┴───────────────────────────────────────────────────────┘ ``` )", 0) \ - M(MySQLDataTypesSupport, mysql_datatypes_support_level, MySQLDataTypesSupportList{}, R"( + DECLARE(MySQLDataTypesSupport, mysql_datatypes_support_level, MySQLDataTypesSupportList{}, R"( Defines how MySQL types are converted to corresponding ClickHouse types. A comma separated list in any combination of `decimal`, `datetime64`, `date2Date32` or `date2String`. - `decimal`: convert `NUMERIC` and `DECIMAL` types to `Decimal` when precision allows it. - `datetime64`: convert `DATETIME` and `TIMESTAMP` types to `DateTime64` instead of `DateTime` when precision is not `0`. - `date2Date32`: convert `DATE` to `Date32` instead of `Date`. Takes precedence over `date2String`. - `date2String`: convert `DATE` to `String` instead of `Date`. Overridden by `datetime64`. )", 0) \ - M(Bool, optimize_trivial_insert_select, false, R"( + DECLARE(Bool, optimize_trivial_insert_select, false, R"( Optimize trivial 'INSERT INTO table SELECT ... FROM TABLES' query )", 0) \ - M(Bool, allow_non_metadata_alters, true, R"( + DECLARE(Bool, allow_non_metadata_alters, true, R"( Allow to execute alters which affects not only tables metadata, but also data on disk )", 0) \ - M(Bool, enable_global_with_statement, true, R"( + DECLARE(Bool, enable_global_with_statement, true, R"( Propagate WITH statements to UNION queries and all subqueries )", 0) \ - M(Bool, aggregate_functions_null_for_empty, false, R"( + DECLARE(Bool, aggregate_functions_null_for_empty, false, R"( Enables or disables rewriting all aggregate functions in a query, adding [-OrNull](../../sql-reference/aggregate-functions/combinators.md/#agg-functions-combinator-ornull) suffix to them. Enable it for SQL standard compatibility. It is implemented via query rewrite (similar to [count_distinct_implementation](#count_distinct_implementation) setting) to get consistent results for distributed queries. @@ -3830,7 +3817,7 @@ With `aggregate_functions_null_for_empty = 1` the result would be: └───────────────┴──────────────┘ ``` )", 0) \ - M(Bool, optimize_syntax_fuse_functions, false, R"( + DECLARE(Bool, optimize_syntax_fuse_functions, false, R"( Enables to fuse aggregate functions with identical argument. It rewrites query contains at least two aggregate functions from [sum](../../sql-reference/aggregate-functions/reference/sum.md/#agg_function-sum), [count](../../sql-reference/aggregate-functions/reference/count.md/#agg_function-count) or [avg](../../sql-reference/aggregate-functions/reference/avg.md/#agg_function-avg) with identical argument to [sumCount](../../sql-reference/aggregate-functions/reference/sumcount.md/#agg_function-sumCount). Possible values: @@ -3859,7 +3846,7 @@ SELECT FROM fuse_tbl ``` )", 0) \ - M(Bool, flatten_nested, true, R"( + DECLARE(Bool, flatten_nested, true, R"( Sets the data format of a [nested](../../sql-reference/data-types/nested-data-structures/index.md) columns. Possible values: @@ -3921,7 +3908,7 @@ SETTINGS index_granularity = 8192 │ └────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ ``` )", 0) \ - M(Bool, asterisk_include_materialized_columns, false, R"( + DECLARE(Bool, asterisk_include_materialized_columns, false, R"( Include [MATERIALIZED](../../sql-reference/statements/create/table.md#materialized) columns for wildcard query (`SELECT *`). Possible values: @@ -3929,7 +3916,7 @@ Possible values: - 0 - disabled - 1 - enabled )", 0) \ - M(Bool, asterisk_include_alias_columns, false, R"( + DECLARE(Bool, asterisk_include_alias_columns, false, R"( Include [ALIAS](../../sql-reference/statements/create/table.md#alias) columns for wildcard query (`SELECT *`). Possible values: @@ -3937,7 +3924,7 @@ Possible values: - 0 - disabled - 1 - enabled )", 0) \ - M(Bool, optimize_skip_merged_partitions, false, R"( + DECLARE(Bool, optimize_skip_merged_partitions, false, R"( Enables or disables optimization for [OPTIMIZE TABLE ... FINAL](../../sql-reference/statements/optimize.md) query if there is only one part with level > 0 and it doesn't have expired TTL. - `OPTIMIZE TABLE ... FINAL SETTINGS optimize_skip_merged_partitions=1` @@ -3949,7 +3936,7 @@ Possible values: - 1 - Enable optimization. - 0 - Disable optimization. )", 0) \ - M(Bool, optimize_on_insert, true, R"( + DECLARE(Bool, optimize_on_insert, true, R"( Enables or disables data transformation before the insertion, as if merge was done on this block (according to table engine). Possible values: @@ -4000,7 +3987,7 @@ Result: Note that this setting influences [Materialized view](../../sql-reference/statements/create/view.md/#materialized) and [MaterializedMySQL](../../engines/database-engines/materialized-mysql.md) behaviour. )", 0) \ - M(Bool, optimize_use_projections, true, R"( + DECLARE(Bool, optimize_use_projections, true, R"( Enables or disables [projection](../../engines/table-engines/mergetree-family/mergetree.md/#projections) optimization when processing `SELECT` queries. Possible values: @@ -4008,10 +3995,10 @@ Possible values: - 0 — Projection optimization disabled. - 1 — Projection optimization enabled. )", 0) ALIAS(allow_experimental_projection_optimization) \ - M(Bool, optimize_use_implicit_projections, true, R"( + DECLARE(Bool, optimize_use_implicit_projections, true, R"( Automatically choose implicit projections to perform SELECT query )", 0) \ - M(Bool, force_optimize_projection, false, R"( + DECLARE(Bool, force_optimize_projection, false, R"( Enables or disables the obligatory use of [projections](../../engines/table-engines/mergetree-family/mergetree.md/#projections) in `SELECT` queries, when projection optimization is enabled (see [optimize_use_projections](#optimize_use_projections) setting). Possible values: @@ -4019,14 +4006,14 @@ Possible values: - 0 — Projection optimization is not obligatory. - 1 — Projection optimization is obligatory. )", 0) \ - M(String, force_optimize_projection_name, "", R"( + DECLARE(String, force_optimize_projection_name, "", R"( If it is set to a non-empty string, check that this projection is used in the query at least once. Possible values: - string: name of projection that used in a query )", 0) \ - M(String, preferred_optimize_projection_name, "", R"( + DECLARE(String, preferred_optimize_projection_name, "", R"( If it is set to a non-empty string, ClickHouse will try to apply specified projection in query. @@ -4034,17 +4021,17 @@ Possible values: - string: name of preferred projection )", 0) \ - M(Bool, async_socket_for_remote, true, R"( + DECLARE(Bool, async_socket_for_remote, true, R"( Enables asynchronous read from socket while executing remote query. Enabled by default. )", 0) \ - M(Bool, async_query_sending_for_remote, true, R"( + DECLARE(Bool, async_query_sending_for_remote, true, R"( Enables asynchronous connection creation and query sending while executing remote query. Enabled by default. )", 0) \ - M(Bool, insert_null_as_default, true, R"( + DECLARE(Bool, insert_null_as_default, true, R"( Enables or disables the insertion of [default values](../../sql-reference/statements/create/table.md/#create-default-values) instead of [NULL](../../sql-reference/syntax.md/#null-literal) into columns with not [nullable](../../sql-reference/data-types/nullable.md/#data_type-nullable) data type. If column type is not nullable and this setting is disabled, then inserting `NULL` causes an exception. If column type is nullable, then `NULL` values are inserted as is, regardless of this setting. @@ -4055,10 +4042,10 @@ Possible values: - 0 — Inserting `NULL` into a not nullable column causes an exception. - 1 — Default column value is inserted instead of `NULL`. )", 0) \ - M(Bool, describe_extend_object_types, false, R"( + DECLARE(Bool, describe_extend_object_types, false, R"( Deduce concrete type of columns of type Object in DESCRIBE query )", 0) \ - M(Bool, describe_include_subcolumns, false, R"( + DECLARE(Bool, describe_include_subcolumns, false, R"( Enables describing subcolumns for a [DESCRIBE](../../sql-reference/statements/describe-table.md) query. For example, members of a [Tuple](../../sql-reference/data-types/tuple.md) or subcolumns of a [Map](../../sql-reference/data-types/map.md/#map-subcolumns), [Nullable](../../sql-reference/data-types/nullable.md/#finding-null) or an [Array](../../sql-reference/data-types/array.md/#array-size) data type. Possible values: @@ -4070,30 +4057,30 @@ Possible values: See an example for the [DESCRIBE](../../sql-reference/statements/describe-table.md) statement. )", 0) \ - M(Bool, describe_include_virtual_columns, false, R"( + DECLARE(Bool, describe_include_virtual_columns, false, R"( If true, virtual columns of table will be included into result of DESCRIBE query )", 0) \ - M(Bool, describe_compact_output, false, R"( + DECLARE(Bool, describe_compact_output, false, R"( If true, include only column names and types into result of DESCRIBE query )", 0) \ - M(Bool, apply_mutations_on_fly, false, R"( + DECLARE(Bool, apply_mutations_on_fly, false, R"( If true, mutations (UPDATEs and DELETEs) which are not materialized in data part will be applied on SELECTs. Only available in ClickHouse Cloud. )", 0) \ - M(Bool, mutations_execute_nondeterministic_on_initiator, false, R"( + DECLARE(Bool, mutations_execute_nondeterministic_on_initiator, false, R"( If true constant nondeterministic functions (e.g. function `now()`) are executed on initiator and replaced to literals in `UPDATE` and `DELETE` queries. It helps to keep data in sync on replicas while executing mutations with constant nondeterministic functions. Default value: `false`. )", 0) \ - M(Bool, mutations_execute_subqueries_on_initiator, false, R"( + DECLARE(Bool, mutations_execute_subqueries_on_initiator, false, R"( If true scalar subqueries are executed on initiator and replaced to literals in `UPDATE` and `DELETE` queries. Default value: `false`. )", 0) \ - M(UInt64, mutations_max_literal_size_to_replace, 16384, R"( + DECLARE(UInt64, mutations_max_literal_size_to_replace, 16384, R"( The maximum size of serialized literal in bytes to replace in `UPDATE` and `DELETE` queries. Takes effect only if at least one the two settings above is enabled. Default value: 16384 (16 KiB). )", 0) \ \ - M(Float, create_replicated_merge_tree_fault_injection_probability, 0.0f, R"( + DECLARE(Float, create_replicated_merge_tree_fault_injection_probability, 0.0f, R"( The probability of a fault injection during table creation after creating metadata in ZooKeeper )", 0) \ \ - M(Bool, use_query_cache, false, R"( + DECLARE(Bool, use_query_cache, false, R"( If turned on, `SELECT` queries may utilize the [query cache](../query-cache.md). Parameters [enable_reads_from_query_cache](#enable-reads-from-query-cache) and [enable_writes_to_query_cache](#enable-writes-to-query-cache) control in more detail how the cache is used. @@ -4102,7 +4089,7 @@ Possible values: - 0 - Disabled - 1 - Enabled )", 0) \ - M(Bool, enable_writes_to_query_cache, true, R"( + DECLARE(Bool, enable_writes_to_query_cache, true, R"( If turned on, results of `SELECT` queries are stored in the [query cache](../query-cache.md). Possible values: @@ -4110,7 +4097,7 @@ Possible values: - 0 - Disabled - 1 - Enabled )", 0) \ - M(Bool, enable_reads_from_query_cache, true, R"( + DECLARE(Bool, enable_reads_from_query_cache, true, R"( If turned on, results of `SELECT` queries are retrieved from the [query cache](../query-cache.md). Possible values: @@ -4118,7 +4105,7 @@ Possible values: - 0 - Disabled - 1 - Enabled )", 0) \ - M(QueryCacheNondeterministicFunctionHandling, query_cache_nondeterministic_function_handling, QueryCacheNondeterministicFunctionHandling::Throw, R"( + DECLARE(QueryCacheNondeterministicFunctionHandling, query_cache_nondeterministic_function_handling, QueryCacheNondeterministicFunctionHandling::Throw, R"( Controls how the [query cache](../query-cache.md) handles `SELECT` queries with non-deterministic functions like `rand()` or `now()`. Possible values: @@ -4127,7 +4114,7 @@ Possible values: - `'save'` - Cache the query result. - `'ignore'` - Don't cache the query result and don't throw an exception. )", 0) \ - M(QueryCacheSystemTableHandling, query_cache_system_table_handling, QueryCacheSystemTableHandling::Throw, R"( + DECLARE(QueryCacheSystemTableHandling, query_cache_system_table_handling, QueryCacheSystemTableHandling::Throw, R"( Controls how the [query cache](../query-cache.md) handles `SELECT` queries against system tables, i.e. tables in databases `system.*` and `information_schema.*`. Possible values: @@ -4136,35 +4123,35 @@ Possible values: - `'save'` - Cache the query result. - `'ignore'` - Don't cache the query result and don't throw an exception. )", 0) \ - M(UInt64, query_cache_max_size_in_bytes, 0, R"( + DECLARE(UInt64, query_cache_max_size_in_bytes, 0, R"( The maximum amount of memory (in bytes) the current user may allocate in the [query cache](../query-cache.md). 0 means unlimited. Possible values: - Positive integer >= 0. )", 0) \ - M(UInt64, query_cache_max_entries, 0, R"( + DECLARE(UInt64, query_cache_max_entries, 0, R"( The maximum number of query results the current user may store in the [query cache](../query-cache.md). 0 means unlimited. Possible values: - Positive integer >= 0. )", 0) \ - M(UInt64, query_cache_min_query_runs, 0, R"( + DECLARE(UInt64, query_cache_min_query_runs, 0, R"( Minimum number of times a `SELECT` query must run before its result is stored in the [query cache](../query-cache.md). Possible values: - Positive integer >= 0. )", 0) \ - M(Milliseconds, query_cache_min_query_duration, 0, R"( + DECLARE(Milliseconds, query_cache_min_query_duration, 0, R"( Minimum duration in milliseconds a query needs to run for its result to be stored in the [query cache](../query-cache.md). Possible values: - Positive integer >= 0. )", 0) \ - M(Bool, query_cache_compress_entries, true, R"( + DECLARE(Bool, query_cache_compress_entries, true, R"( Compress entries in the [query cache](../query-cache.md). Lessens the memory consumption of the query cache at the cost of slower inserts into / reads from it. Possible values: @@ -4172,7 +4159,7 @@ Possible values: - 0 - Disabled - 1 - Enabled )", 0) \ - M(Bool, query_cache_squash_partial_results, true, R"( + DECLARE(Bool, query_cache_squash_partial_results, true, R"( Squash partial result blocks to blocks of size [max_block_size](#setting-max_block_size). Reduces performance of inserts into the [query cache](../query-cache.md) but improves the compressability of cache entries (see [query_cache_compress-entries](#query-cache-compress-entries)). Possible values: @@ -4180,14 +4167,14 @@ Possible values: - 0 - Disabled - 1 - Enabled )", 0) \ - M(Seconds, query_cache_ttl, 60, R"( + DECLARE(Seconds, query_cache_ttl, 60, R"( After this time in seconds entries in the [query cache](../query-cache.md) become stale. Possible values: - Positive integer >= 0. )", 0) \ - M(Bool, query_cache_share_between_users, false, R"( + DECLARE(Bool, query_cache_share_between_users, false, R"( If turned on, the result of `SELECT` queries cached in the [query cache](../query-cache.md) can be read by other users. It is not recommended to enable this setting due to security reasons. @@ -4196,7 +4183,7 @@ Possible values: - 0 - Disabled - 1 - Enabled )", 0) \ - M(String, query_cache_tag, "", R"( + DECLARE(String, query_cache_tag, "", R"( A string which acts as a label for [query cache](../query-cache.md) entries. The same queries with different tags are considered different by the query cache. @@ -4204,14 +4191,14 @@ Possible values: - Any string )", 0) \ - M(Bool, enable_sharing_sets_for_mutations, true, R"( + DECLARE(Bool, enable_sharing_sets_for_mutations, true, R"( Allow sharing set objects build for IN subqueries between different tasks of the same mutation. This reduces memory usage and CPU consumption )", 0) \ \ - M(Bool, optimize_rewrite_sum_if_to_count_if, true, R"( + DECLARE(Bool, optimize_rewrite_sum_if_to_count_if, true, R"( Rewrite sumIf() and sum(if()) function countIf() function when logically equivalent )", 0) \ - M(Bool, optimize_rewrite_aggregate_function_with_if, true, R"( + DECLARE(Bool, optimize_rewrite_aggregate_function_with_if, true, R"( Rewrite aggregate functions with if expression as argument when logically equivalent. For example, `avg(if(cond, col, null))` can be rewritten to `avgOrNullIf(cond, col)`. It may improve performance. @@ -4219,10 +4206,10 @@ For example, `avg(if(cond, col, null))` can be rewritten to `avgOrNullIf(cond, c Supported only with experimental analyzer (`enable_analyzer = 1`). ::: )", 0) \ - M(Bool, optimize_rewrite_array_exists_to_has, false, R"( + DECLARE(Bool, optimize_rewrite_array_exists_to_has, false, R"( Rewrite arrayExists() functions to has() when logically equivalent. For example, arrayExists(x -> x = 1, arr) can be rewritten to has(arr, 1) )", 0) \ - M(UInt64, insert_shard_id, 0, R"( + DECLARE(UInt64, insert_shard_id, 0, R"( If not `0`, specifies the shard of [Distributed](../../engines/table-engines/special/distributed.md/#distributed) table into which the data will be inserted synchronously. If `insert_shard_id` value is incorrect, the server will throw an exception. @@ -4267,57 +4254,57 @@ Result: ``` )", 0) \ \ - M(Bool, collect_hash_table_stats_during_aggregation, true, R"( + DECLARE(Bool, collect_hash_table_stats_during_aggregation, true, R"( Enable collecting hash table statistics to optimize memory allocation )", 0) \ - M(UInt64, max_size_to_preallocate_for_aggregation, 100'000'000, R"( + DECLARE(UInt64, max_size_to_preallocate_for_aggregation, 100'000'000, R"( For how many elements it is allowed to preallocate space in all hash tables in total before aggregation )", 0) \ \ - M(Bool, collect_hash_table_stats_during_joins, true, R"( + DECLARE(Bool, collect_hash_table_stats_during_joins, true, R"( Enable collecting hash table statistics to optimize memory allocation )", 0) \ - M(UInt64, max_size_to_preallocate_for_joins, 100'000'000, R"( + DECLARE(UInt64, max_size_to_preallocate_for_joins, 100'000'000, R"( For how many elements it is allowed to preallocate space in all hash tables in total before join )", 0) \ \ - M(Bool, kafka_disable_num_consumers_limit, false, R"( + DECLARE(Bool, kafka_disable_num_consumers_limit, false, R"( Disable limit on kafka_num_consumers that depends on the number of available CPU cores. )", 0) \ - M(Bool, allow_experimental_kafka_offsets_storage_in_keeper, false, R"( + DECLARE(Bool, allow_experimental_kafka_offsets_storage_in_keeper, false, R"( Allow experimental feature to store Kafka related offsets in ClickHouse Keeper. When enabled a ClickHouse Keeper path and replica name can be specified to the Kafka table engine. As a result instead of the regular Kafka engine, a new type of storage engine will be used that stores the committed offsets primarily in ClickHouse Keeper )", 0) \ - M(Bool, enable_software_prefetch_in_aggregation, true, R"( + DECLARE(Bool, enable_software_prefetch_in_aggregation, true, R"( Enable use of software prefetch in aggregation )", 0) \ - M(Bool, allow_aggregate_partitions_independently, false, R"( + DECLARE(Bool, allow_aggregate_partitions_independently, false, R"( Enable independent aggregation of partitions on separate threads when partition key suits group by key. Beneficial when number of partitions close to number of cores and partitions have roughly the same size )", 0) \ - M(Bool, force_aggregate_partitions_independently, false, R"( + DECLARE(Bool, force_aggregate_partitions_independently, false, R"( Force the use of optimization when it is applicable, but heuristics decided not to use it )", 0) \ - M(UInt64, max_number_of_partitions_for_independent_aggregation, 128, R"( + DECLARE(UInt64, max_number_of_partitions_for_independent_aggregation, 128, R"( Maximal number of partitions in table to apply optimization )", 0) \ - M(Float, min_hit_rate_to_use_consecutive_keys_optimization, 0.5, R"( + DECLARE(Float, min_hit_rate_to_use_consecutive_keys_optimization, 0.5, R"( Minimal hit rate of a cache which is used for consecutive keys optimization in aggregation to keep it enabled )", 0) \ \ - M(Bool, engine_file_empty_if_not_exists, false, R"( + DECLARE(Bool, engine_file_empty_if_not_exists, false, R"( Allows to select data from a file engine table without file. Possible values: - 0 — `SELECT` throws exception. - 1 — `SELECT` returns empty result. )", 0) \ - M(Bool, engine_file_truncate_on_insert, false, R"( + DECLARE(Bool, engine_file_truncate_on_insert, false, R"( Enables or disables truncate before insert in [File](../../engines/table-engines/special/file.md) engine tables. Possible values: - 0 — `INSERT` query appends new data to the end of the file. - 1 — `INSERT` query replaces existing content of the file with the new data. )", 0) \ - M(Bool, engine_file_allow_create_multiple_files, false, R"( + DECLARE(Bool, engine_file_allow_create_multiple_files, false, R"( Enables or disables creating a new file on each insert in file engine tables if the format has the suffix (`JSON`, `ORC`, `Parquet`, etc.). If enabled, on each insert a new file will be created with a name following this pattern: `data.Parquet` -> `data.1.Parquet` -> `data.2.Parquet`, etc. @@ -4326,26 +4313,26 @@ Possible values: - 0 — `INSERT` query appends new data to the end of the file. - 1 — `INSERT` query creates a new file. )", 0) \ - M(Bool, engine_file_skip_empty_files, false, R"( + DECLARE(Bool, engine_file_skip_empty_files, false, R"( Enables or disables skipping empty files in [File](../../engines/table-engines/special/file.md) engine tables. Possible values: - 0 — `SELECT` throws an exception if empty file is not compatible with requested format. - 1 — `SELECT` returns empty result for empty file. )", 0) \ - M(Bool, engine_url_skip_empty_files, false, R"( + DECLARE(Bool, engine_url_skip_empty_files, false, R"( Enables or disables skipping empty files in [URL](../../engines/table-engines/special/url.md) engine tables. Possible values: - 0 — `SELECT` throws an exception if empty file is not compatible with requested format. - 1 — `SELECT` returns empty result for empty file. )", 0) \ - M(Bool, enable_url_encoding, true, R"( + DECLARE(Bool, enable_url_encoding, true, R"( Allows to enable/disable decoding/encoding path in uri in [URL](../../engines/table-engines/special/url.md) engine tables. Enabled by default. )", 0) \ - M(UInt64, database_replicated_initial_query_timeout_sec, 300, R"( + DECLARE(UInt64, database_replicated_initial_query_timeout_sec, 300, R"( Sets how long initial DDL query should wait for Replicated database to process previous DDL queue entries in seconds. Possible values: @@ -4353,10 +4340,10 @@ Possible values: - Positive integer. - 0 — Unlimited. )", 0) \ - M(Bool, database_replicated_enforce_synchronous_settings, false, R"( + DECLARE(Bool, database_replicated_enforce_synchronous_settings, false, R"( Enforces synchronous waiting for some queries (see also database_atomic_wait_for_drop_and_detach_synchronously, mutation_sync, alter_sync). Not recommended to enable these settings. )", 0) \ - M(UInt64, max_distributed_depth, 5, R"( + DECLARE(UInt64, max_distributed_depth, 5, R"( Limits the maximum depth of recursive queries for [Distributed](../../engines/table-engines/special/distributed.md) tables. If the value is exceeded, the server throws an exception. @@ -4366,31 +4353,31 @@ Possible values: - Positive integer. - 0 — Unlimited depth. )", 0) \ - M(Bool, database_replicated_always_detach_permanently, false, R"( + DECLARE(Bool, database_replicated_always_detach_permanently, false, R"( Execute DETACH TABLE as DETACH TABLE PERMANENTLY if database engine is Replicated )", 0) \ - M(Bool, database_replicated_allow_only_replicated_engine, false, R"( + DECLARE(Bool, database_replicated_allow_only_replicated_engine, false, R"( Allow to create only Replicated tables in database with engine Replicated )", 0) \ - M(UInt64, database_replicated_allow_replicated_engine_arguments, 0, R"( + DECLARE(UInt64, database_replicated_allow_replicated_engine_arguments, 0, R"( 0 - Don't allow to explicitly specify ZooKeeper path and replica name for *MergeTree tables in Replicated databases. 1 - Allow. 2 - Allow, but ignore the specified path and use default one instead. 3 - Allow and don't log a warning. )", 0) \ - M(UInt64, database_replicated_allow_explicit_uuid, 0, R"( + DECLARE(UInt64, database_replicated_allow_explicit_uuid, 0, R"( 0 - Don't allow to explicitly specify UUIDs for tables in Replicated databases. 1 - Allow. 2 - Allow, but ignore the specified UUID and generate a random one instead. )", 0) \ - M(Bool, database_replicated_allow_heavy_create, false, R"( + DECLARE(Bool, database_replicated_allow_heavy_create, false, R"( Allow long-running DDL queries (CREATE AS SELECT and POPULATE) in Replicated database engine. Note that it can block DDL queue for a long time. )", 0) \ - M(Bool, cloud_mode, false, R"( + DECLARE(Bool, cloud_mode, false, R"( Cloud mode )", 0) \ - M(UInt64, cloud_mode_engine, 1, R"( + DECLARE(UInt64, cloud_mode_engine, 1, R"( The engine family allowed in Cloud. 0 - allow everything, 1 - rewrite DDLs to use *ReplicatedMergeTree, 2 - rewrite DDLs to use SharedMergeTree. UInt64 to minimize public part )", 0) \ - M(UInt64, cloud_mode_database_engine, 1, R"( + DECLARE(UInt64, cloud_mode_database_engine, 1, R"( The database engine allowed in Cloud. 1 - rewrite DDLs to use Replicated database, 2 - rewrite DDLs to use Shared database )", 0) \ - M(DistributedDDLOutputMode, distributed_ddl_output_mode, DistributedDDLOutputMode::THROW, R"( + DECLARE(DistributedDDLOutputMode, distributed_ddl_output_mode, DistributedDDLOutputMode::THROW, R"( Sets format of distributed DDL query result. Possible values: @@ -4405,24 +4392,24 @@ Possible values: Cloud default value: `none`. )", 0) \ - M(UInt64, distributed_ddl_entry_format_version, 5, R"( + DECLARE(UInt64, distributed_ddl_entry_format_version, 5, R"( Compatibility version of distributed DDL (ON CLUSTER) queries )", 0) \ \ - M(UInt64, external_storage_max_read_rows, 0, R"( + DECLARE(UInt64, external_storage_max_read_rows, 0, R"( Limit maximum number of rows when table with external engine should flush history data. Now supported only for MySQL table engine, database engine, dictionary and MaterializedMySQL. If equal to 0, this setting is disabled )", 0) \ - M(UInt64, external_storage_max_read_bytes, 0, R"( + DECLARE(UInt64, external_storage_max_read_bytes, 0, R"( Limit maximum number of bytes when table with external engine should flush history data. Now supported only for MySQL table engine, database engine, dictionary and MaterializedMySQL. If equal to 0, this setting is disabled )", 0) \ - M(UInt64, external_storage_connect_timeout_sec, DBMS_DEFAULT_CONNECT_TIMEOUT_SEC, R"( + DECLARE(UInt64, external_storage_connect_timeout_sec, DBMS_DEFAULT_CONNECT_TIMEOUT_SEC, R"( Connect timeout in seconds. Now supported only for MySQL )", 0) \ - M(UInt64, external_storage_rw_timeout_sec, DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC, R"( + DECLARE(UInt64, external_storage_rw_timeout_sec, DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC, R"( Read/write timeout in seconds. Now supported only for MySQL )", 0) \ \ - M(SetOperationMode, union_default_mode, SetOperationMode::Unspecified, R"( + DECLARE(SetOperationMode, union_default_mode, SetOperationMode::Unspecified, R"( Sets a mode for combining `SELECT` query results. The setting is only used when shared with [UNION](../../sql-reference/statements/select/union.md) without explicitly specifying the `UNION ALL` or `UNION DISTINCT`. Possible values: @@ -4433,33 +4420,33 @@ Possible values: See examples in [UNION](../../sql-reference/statements/select/union.md). )", 0) \ - M(SetOperationMode, intersect_default_mode, SetOperationMode::ALL, R"( + DECLARE(SetOperationMode, intersect_default_mode, SetOperationMode::ALL, R"( Set default mode in INTERSECT query. Possible values: empty string, 'ALL', 'DISTINCT'. If empty, query without mode will throw exception. )", 0) \ - M(SetOperationMode, except_default_mode, SetOperationMode::ALL, R"( + DECLARE(SetOperationMode, except_default_mode, SetOperationMode::ALL, R"( Set default mode in EXCEPT query. Possible values: empty string, 'ALL', 'DISTINCT'. If empty, query without mode will throw exception. )", 0) \ - M(Bool, optimize_aggregators_of_group_by_keys, true, R"( + DECLARE(Bool, optimize_aggregators_of_group_by_keys, true, R"( Eliminates min/max/any/anyLast aggregators of GROUP BY keys in SELECT section )", 0) \ - M(Bool, optimize_injective_functions_in_group_by, true, R"( + DECLARE(Bool, optimize_injective_functions_in_group_by, true, R"( Replaces injective functions by it's arguments in GROUP BY section )", 0) \ - M(Bool, optimize_group_by_function_keys, true, R"( + DECLARE(Bool, optimize_group_by_function_keys, true, R"( Eliminates functions of other keys in GROUP BY section )", 0) \ - M(Bool, optimize_group_by_constant_keys, true, R"( + DECLARE(Bool, optimize_group_by_constant_keys, true, R"( Optimize GROUP BY when all keys in block are constant )", 0) \ - M(Bool, legacy_column_name_of_tuple_literal, false, R"( + DECLARE(Bool, legacy_column_name_of_tuple_literal, false, R"( List all names of element of large tuple literals in their column names instead of hash. This settings exists only for compatibility reasons. It makes sense to set to 'true', while doing rolling update of cluster from version lower than 21.7 to higher. )", 0) \ - M(Bool, enable_named_columns_in_function_tuple, false, R"( + DECLARE(Bool, enable_named_columns_in_function_tuple, true, R"( Generate named tuples in function tuple() when all names are unique and can be treated as unquoted identifiers. Beware that this setting might currently result in broken queries. It's not recommended to use in production )", 0) \ \ - M(Bool, query_plan_enable_optimizations, true, R"( + DECLARE(Bool, query_plan_enable_optimizations, true, R"( Toggles query optimization at the query plan level. :::note @@ -4471,7 +4458,7 @@ Possible values: - 0 - Disable all optimizations at the query plan level - 1 - Enable optimizations at the query plan level (but individual optimizations may still be disabled via their individual settings) )", 0) \ - M(UInt64, query_plan_max_optimizations_to_apply, 10000, R"( + DECLARE(UInt64, query_plan_max_optimizations_to_apply, 10000, R"( Limits the total number of optimizations applied to query plan, see setting [query_plan_enable_optimizations](#query_plan_enable_optimizations). Useful to avoid long optimization times for complex queries. If the actual number of optimizations exceeds this setting, an exception is thrown. @@ -4480,7 +4467,7 @@ If the actual number of optimizations exceeds this setting, an exception is thro This is an expert-level setting which should only be used for debugging by developers. The setting may change in future in backward-incompatible ways or be removed. ::: )", 0) \ - M(Bool, query_plan_lift_up_array_join, true, R"( + DECLARE(Bool, query_plan_lift_up_array_join, true, R"( Toggles a query-plan-level optimization which moves ARRAY JOINs up in the execution plan. Only takes effect if setting [query_plan_enable_optimizations](#query_plan_enable_optimizations) is 1. @@ -4493,7 +4480,7 @@ Possible values: - 0 - Disable - 1 - Enable )", 0) \ - M(Bool, query_plan_push_down_limit, true, R"( + DECLARE(Bool, query_plan_push_down_limit, true, R"( Toggles a query-plan-level optimization which moves LIMITs down in the execution plan. Only takes effect if setting [query_plan_enable_optimizations](#query_plan_enable_optimizations) is 1. @@ -4506,7 +4493,7 @@ Possible values: - 0 - Disable - 1 - Enable )", 0) \ - M(Bool, query_plan_split_filter, true, R"( + DECLARE(Bool, query_plan_split_filter, true, R"( :::note This is an expert-level setting which should only be used for debugging by developers. The setting may change in future in backward-incompatible ways or be removed. ::: @@ -4519,7 +4506,7 @@ Possible values: - 0 - Disable - 1 - Enable )", 0) \ - M(Bool, query_plan_merge_expressions, true, R"( + DECLARE(Bool, query_plan_merge_expressions, true, R"( Toggles a query-plan-level optimization which merges consecutive filters. Only takes effect if setting [query_plan_enable_optimizations](#query_plan_enable_optimizations) is 1. @@ -4532,10 +4519,10 @@ Possible values: - 0 - Disable - 1 - Enable )", 0) \ - M(Bool, query_plan_merge_filters, false, R"( + DECLARE(Bool, query_plan_merge_filters, false, R"( Allow to merge filters in the query plan )", 0) \ - M(Bool, query_plan_filter_push_down, true, R"( + DECLARE(Bool, query_plan_filter_push_down, true, R"( Toggles a query-plan-level optimization which moves filters down in the execution plan. Only takes effect if setting [query_plan_enable_optimizations](#query_plan_enable_optimizations) is 1. @@ -4548,13 +4535,13 @@ Possible values: - 0 - Disable - 1 - Enable )", 0) \ - M(Bool, query_plan_convert_outer_join_to_inner_join, true, R"( + DECLARE(Bool, query_plan_convert_outer_join_to_inner_join, true, R"( Allow to convert OUTER JOIN to INNER JOIN if filter after JOIN always filters default values )", 0) \ - M(Bool, query_plan_optimize_prewhere, true, R"( + DECLARE(Bool, query_plan_optimize_prewhere, true, R"( Allow to push down filter to PREWHERE expression for supported storages )", 0) \ - M(Bool, query_plan_execute_functions_after_sorting, true, R"( + DECLARE(Bool, query_plan_execute_functions_after_sorting, true, R"( Toggles a query-plan-level optimization which moves expressions after sorting steps. Only takes effect if setting [query_plan_enable_optimizations](#query_plan_enable_optimizations) is 1. @@ -4567,7 +4554,7 @@ Possible values: - 0 - Disable - 1 - Enable )", 0) \ - M(Bool, query_plan_reuse_storage_ordering_for_window_functions, true, R"( + DECLARE(Bool, query_plan_reuse_storage_ordering_for_window_functions, true, R"( Toggles a query-plan-level optimization which uses storage sorting when sorting for window functions. Only takes effect if setting [query_plan_enable_optimizations](#query_plan_enable_optimizations) is 1. @@ -4580,7 +4567,7 @@ Possible values: - 0 - Disable - 1 - Enable )", 0) \ - M(Bool, query_plan_lift_up_union, true, R"( + DECLARE(Bool, query_plan_lift_up_union, true, R"( Toggles a query-plan-level optimization which moves larger subtrees of the query plan into union to enable further optimizations. Only takes effect if setting [query_plan_enable_optimizations](#query_plan_enable_optimizations) is 1. @@ -4593,7 +4580,7 @@ Possible values: - 0 - Disable - 1 - Enable )", 0) \ - M(Bool, query_plan_read_in_order, true, R"( + DECLARE(Bool, query_plan_read_in_order, true, R"( Toggles the read in-order optimization query-plan-level optimization. Only takes effect if setting [query_plan_enable_optimizations](#query_plan_enable_optimizations) is 1. @@ -4606,7 +4593,7 @@ Possible values: - 0 - Disable - 1 - Enable )", 0) \ - M(Bool, query_plan_aggregation_in_order, true, R"( + DECLARE(Bool, query_plan_aggregation_in_order, true, R"( Toggles the aggregation in-order query-plan-level optimization. Only takes effect if setting [query_plan_enable_optimizations](#query_plan_enable_optimizations) is 1. @@ -4619,7 +4606,7 @@ Possible values: - 0 - Disable - 1 - Enable )", 0) \ - M(Bool, query_plan_remove_redundant_sorting, true, R"( + DECLARE(Bool, query_plan_remove_redundant_sorting, true, R"( Toggles a query-plan-level optimization which removes redundant sorting steps, e.g. in subqueries. Only takes effect if setting [query_plan_enable_optimizations](#query_plan_enable_optimizations) is 1. @@ -4632,7 +4619,7 @@ Possible values: - 0 - Disable - 1 - Enable )", 0) \ - M(Bool, query_plan_remove_redundant_distinct, true, R"( + DECLARE(Bool, query_plan_remove_redundant_distinct, true, R"( Toggles a query-plan-level optimization which removes redundant DISTINCT steps. Only takes effect if setting [query_plan_enable_optimizations](#query_plan_enable_optimizations) is 1. @@ -4645,10 +4632,10 @@ Possible values: - 0 - Disable - 1 - Enable )", 0) \ - M(Bool, query_plan_enable_multithreading_after_window_functions, true, R"( + DECLARE(Bool, query_plan_enable_multithreading_after_window_functions, true, R"( Enable multithreading after evaluating window functions to allow parallel stream processing )", 0) \ - M(UInt64, regexp_max_matches_per_row, 1000, R"( + DECLARE(UInt64, regexp_max_matches_per_row, 1000, R"( Sets the maximum number of matches for a single regular expression per row. Use it to protect against memory overload when using greedy regular expression in the [extractAllGroupsHorizontal](../../sql-reference/functions/string-search-functions.md/#extractallgroups-horizontal) function. Possible values: @@ -4656,7 +4643,7 @@ Possible values: - Positive integer. )", 0) \ \ - M(UInt64, limit, 0, R"( + DECLARE(UInt64, limit, 0, R"( Sets the maximum number of rows to get from the query result. It adjusts the value set by the [LIMIT](../../sql-reference/statements/select/limit.md/#limit-clause) clause, so that the limit, specified in the query, cannot exceed the limit, set by this setting. Possible values: @@ -4664,7 +4651,7 @@ Possible values: - 0 — The number of rows is not limited. - Positive integer. )", 0) \ - M(UInt64, offset, 0, R"( + DECLARE(UInt64, offset, 0, R"( Sets the number of rows to skip before starting to return rows from the query. It adjusts the offset set by the [OFFSET](../../sql-reference/statements/select/offset.md/#offset-fetch) clause, so that these two values are summarized. Possible values: @@ -4699,7 +4686,7 @@ Result: ``` )", 0) \ \ - M(UInt64, function_range_max_elements_in_block, 500000000, R"( + DECLARE(UInt64, function_range_max_elements_in_block, 500000000, R"( Sets the safety threshold for data volume generated by function [range](../../sql-reference/functions/array-functions.md/#range). Defines the maximum number of values generated by function per block of data (sum of array sizes for every row in a block). Possible values: @@ -4711,13 +4698,13 @@ Possible values: - [max_block_size](#setting-max_block_size) - [min_insert_block_size_rows](#min-insert-block-size-rows) )", 0) \ - M(UInt64, function_sleep_max_microseconds_per_block, 3000000, R"( + DECLARE(UInt64, function_sleep_max_microseconds_per_block, 3000000, R"( Maximum number of microseconds the function `sleep` is allowed to sleep for each block. If a user called it with a larger value, it throws an exception. It is a safety threshold. )", 0) \ - M(UInt64, function_visible_width_behavior, 1, R"( + DECLARE(UInt64, function_visible_width_behavior, 1, R"( The version of `visibleWidth` behavior. 0 - only count the number of code points; 1 - correctly count zero-width and combining characters, count full-width characters as two, estimate the tab width, count delete characters. )", 0) \ - M(ShortCircuitFunctionEvaluation, short_circuit_function_evaluation, ShortCircuitFunctionEvaluation::ENABLE, R"( + DECLARE(ShortCircuitFunctionEvaluation, short_circuit_function_evaluation, ShortCircuitFunctionEvaluation::ENABLE, R"( Allows calculating the [if](../../sql-reference/functions/conditional-functions.md/#if), [multiIf](../../sql-reference/functions/conditional-functions.md/#multiif), [and](../../sql-reference/functions/logical-functions.md/#logical-and-function), and [or](../../sql-reference/functions/logical-functions.md/#logical-or-function) functions according to a [short scheme](https://en.wikipedia.org/wiki/Short-circuit_evaluation). This helps optimize the execution of complex expressions in these functions and prevent possible exceptions (such as division by zero when it is not expected). Possible values: @@ -4727,234 +4714,237 @@ Possible values: - `disable` — Disables short-circuit function evaluation. )", 0) \ \ - M(LocalFSReadMethod, storage_file_read_method, LocalFSReadMethod::pread, R"( + DECLARE(LocalFSReadMethod, storage_file_read_method, LocalFSReadMethod::pread, R"( Method of reading data from storage file, one of: `read`, `pread`, `mmap`. The mmap method does not apply to clickhouse-server (it's intended for clickhouse-local). )", 0) \ - M(String, local_filesystem_read_method, "pread_threadpool", R"( + DECLARE(String, local_filesystem_read_method, "pread_threadpool", R"( Method of reading data from local filesystem, one of: read, pread, mmap, io_uring, pread_threadpool. The 'io_uring' method is experimental and does not work for Log, TinyLog, StripeLog, File, Set and Join, and other tables with append-able files in presence of concurrent reads and writes. )", 0) \ - M(String, remote_filesystem_read_method, "threadpool", R"( + DECLARE(String, remote_filesystem_read_method, "threadpool", R"( Method of reading data from remote filesystem, one of: read, threadpool. )", 0) \ - M(Bool, local_filesystem_read_prefetch, false, R"( + DECLARE(Bool, local_filesystem_read_prefetch, false, R"( Should use prefetching when reading data from local filesystem. )", 0) \ - M(Bool, remote_filesystem_read_prefetch, true, R"( + DECLARE(Bool, remote_filesystem_read_prefetch, true, R"( Should use prefetching when reading data from remote filesystem. )", 0) \ - M(Int64, read_priority, 0, R"( + DECLARE(Int64, read_priority, 0, R"( Priority to read data from local filesystem or remote filesystem. Only supported for 'pread_threadpool' method for local filesystem and for `threadpool` method for remote filesystem. )", 0) \ - M(UInt64, merge_tree_min_rows_for_concurrent_read_for_remote_filesystem, (20 * 8192), R"( -The minimum number of lines to read from one file before the [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) engine can parallelize reading, when reading from remote filesystem. + DECLARE(UInt64, merge_tree_min_rows_for_concurrent_read_for_remote_filesystem, 0, R"( +The minimum number of lines to read from one file before the [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) engine can parallelize reading, when reading from remote filesystem. We do not recommend using this setting. Possible values: - Positive integer. )", 0) \ - M(UInt64, merge_tree_min_bytes_for_concurrent_read_for_remote_filesystem, (24 * 10 * 1024 * 1024), R"( -The minimum number of bytes to read from one file before [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) engine can parallelize reading, when reading from remote filesystem. + DECLARE(UInt64, merge_tree_min_bytes_for_concurrent_read_for_remote_filesystem, 0, R"( +The minimum number of bytes to read from one file before [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) engine can parallelize reading, when reading from remote filesystem. We do not recommend using this setting. Possible values: - Positive integer. )", 0) \ - M(UInt64, remote_read_min_bytes_for_seek, 4 * DBMS_DEFAULT_BUFFER_SIZE, R"( + DECLARE(UInt64, remote_read_min_bytes_for_seek, 4 * DBMS_DEFAULT_BUFFER_SIZE, R"( Min bytes required for remote read (url, s3) to do seek, instead of read with ignore. )", 0) \ - M(UInt64, merge_tree_min_bytes_per_task_for_remote_reading, 2 * DBMS_DEFAULT_BUFFER_SIZE, R"( + DECLARE(UInt64, merge_tree_min_bytes_per_task_for_remote_reading, 2 * DBMS_DEFAULT_BUFFER_SIZE, R"( Min bytes to read per task. )", 0) ALIAS(filesystem_prefetch_min_bytes_for_single_read_task) \ - M(Bool, merge_tree_use_const_size_tasks_for_remote_reading, true, R"( + DECLARE(Bool, merge_tree_use_const_size_tasks_for_remote_reading, true, R"( Whether to use constant size tasks for reading from a remote table. )", 0) \ - M(Bool, merge_tree_determine_task_size_by_prewhere_columns, true, R"( + DECLARE(Bool, merge_tree_determine_task_size_by_prewhere_columns, true, R"( Whether to use only prewhere columns size to determine reading task size. )", 0) \ - M(UInt64, merge_tree_compact_parts_min_granules_to_multibuffer_read, 16, R"( + DECLARE(UInt64, merge_tree_min_read_task_size, 8, R"( +Hard lower limit on the task size (even when the number of granules is low and the number of available threads is high we won't allocate smaller tasks +)", 0) \ + DECLARE(UInt64, merge_tree_compact_parts_min_granules_to_multibuffer_read, 16, R"( Only available in ClickHouse Cloud. Number of granules in stripe of compact part of MergeTree tables to use multibuffer reader, which supports parallel reading and prefetch. In case of reading from remote fs using of multibuffer reader increases number of read request. )", 0) \ \ - M(Bool, async_insert, false, R"( + DECLARE(Bool, async_insert, false, R"( If true, data from INSERT query is stored in queue and later flushed to table in background. If wait_for_async_insert is false, INSERT query is processed almost instantly, otherwise client will wait until data will be flushed to table )", 0) \ - M(Bool, wait_for_async_insert, true, R"( + DECLARE(Bool, wait_for_async_insert, true, R"( If true wait for processing of asynchronous insertion )", 0) \ - M(Seconds, wait_for_async_insert_timeout, DBMS_DEFAULT_LOCK_ACQUIRE_TIMEOUT_SEC, R"( + DECLARE(Seconds, wait_for_async_insert_timeout, DBMS_DEFAULT_LOCK_ACQUIRE_TIMEOUT_SEC, R"( Timeout for waiting for processing asynchronous insertion )", 0) \ - M(UInt64, async_insert_max_data_size, 10485760, R"( + DECLARE(UInt64, async_insert_max_data_size, 10485760, R"( Maximum size in bytes of unparsed data collected per query before being inserted )", 0) \ - M(UInt64, async_insert_max_query_number, 450, R"( + DECLARE(UInt64, async_insert_max_query_number, 450, R"( Maximum number of insert queries before being inserted )", 0) \ - M(Milliseconds, async_insert_poll_timeout_ms, 10, R"( + DECLARE(Milliseconds, async_insert_poll_timeout_ms, 10, R"( Timeout for polling data from asynchronous insert queue )", 0) \ - M(Bool, async_insert_use_adaptive_busy_timeout, true, R"( + DECLARE(Bool, async_insert_use_adaptive_busy_timeout, true, R"( If it is set to true, use adaptive busy timeout for asynchronous inserts )", 0) \ - M(Milliseconds, async_insert_busy_timeout_min_ms, 50, R"( + DECLARE(Milliseconds, async_insert_busy_timeout_min_ms, 50, R"( If auto-adjusting is enabled through async_insert_use_adaptive_busy_timeout, minimum time to wait before dumping collected data per query since the first data appeared. It also serves as the initial value for the adaptive algorithm )", 0) \ - M(Milliseconds, async_insert_busy_timeout_max_ms, 200, R"( + DECLARE(Milliseconds, async_insert_busy_timeout_max_ms, 200, R"( Maximum time to wait before dumping collected data per query since the first data appeared. )", 0) ALIAS(async_insert_busy_timeout_ms) \ - M(Double, async_insert_busy_timeout_increase_rate, 0.2, R"( + DECLARE(Double, async_insert_busy_timeout_increase_rate, 0.2, R"( The exponential growth rate at which the adaptive asynchronous insert timeout increases )", 0) \ - M(Double, async_insert_busy_timeout_decrease_rate, 0.2, R"( + DECLARE(Double, async_insert_busy_timeout_decrease_rate, 0.2, R"( The exponential growth rate at which the adaptive asynchronous insert timeout decreases )", 0) \ \ - M(UInt64, remote_fs_read_max_backoff_ms, 10000, R"( + DECLARE(UInt64, remote_fs_read_max_backoff_ms, 10000, R"( Max wait time when trying to read data for remote disk )", 0) \ - M(UInt64, remote_fs_read_backoff_max_tries, 5, R"( + DECLARE(UInt64, remote_fs_read_backoff_max_tries, 5, R"( Max attempts to read with backoff )", 0) \ - M(Bool, enable_filesystem_cache, true, R"( + DECLARE(Bool, enable_filesystem_cache, true, R"( Use cache for remote filesystem. This setting does not turn on/off cache for disks (must be done via disk config), but allows to bypass cache for some queries if intended )", 0) \ - M(String, filesystem_cache_name, "", R"( + DECLARE(String, filesystem_cache_name, "", R"( Filesystem cache name to use for stateless table engines or data lakes )", 0) \ - M(Bool, enable_filesystem_cache_on_write_operations, false, R"( + DECLARE(Bool, enable_filesystem_cache_on_write_operations, false, R"( Write into cache on write operations. To actually work this setting requires be added to disk config too )", 0) \ - M(Bool, enable_filesystem_cache_log, false, R"( + DECLARE(Bool, enable_filesystem_cache_log, false, R"( Allows to record the filesystem caching log for each query )", 0) \ - M(Bool, read_from_filesystem_cache_if_exists_otherwise_bypass_cache, false, R"( + DECLARE(Bool, read_from_filesystem_cache_if_exists_otherwise_bypass_cache, false, R"( Allow to use the filesystem cache in passive mode - benefit from the existing cache entries, but don't put more entries into the cache. If you set this setting for heavy ad-hoc queries and leave it disabled for short real-time queries, this will allows to avoid cache threshing by too heavy queries and to improve the overall system efficiency. )", 0) \ - M(Bool, skip_download_if_exceeds_query_cache, true, R"( + DECLARE(Bool, skip_download_if_exceeds_query_cache, true, R"( Skip download from remote filesystem if exceeds query cache size )", 0) \ - M(UInt64, filesystem_cache_max_download_size, (128UL * 1024 * 1024 * 1024), R"( + DECLARE(UInt64, filesystem_cache_max_download_size, (128UL * 1024 * 1024 * 1024), R"( Max remote filesystem cache size that can be downloaded by a single query )", 0) \ - M(Bool, throw_on_error_from_cache_on_write_operations, false, R"( + DECLARE(Bool, throw_on_error_from_cache_on_write_operations, false, R"( Ignore error from cache when caching on write operations (INSERT, merges) )", 0) \ - M(UInt64, filesystem_cache_segments_batch_size, 20, R"( + DECLARE(UInt64, filesystem_cache_segments_batch_size, 20, R"( Limit on size of a single batch of file segments that a read buffer can request from cache. Too low value will lead to excessive requests to cache, too large may slow down eviction from cache )", 0) \ - M(UInt64, filesystem_cache_reserve_space_wait_lock_timeout_milliseconds, 1000, R"( + DECLARE(UInt64, filesystem_cache_reserve_space_wait_lock_timeout_milliseconds, 1000, R"( Wait time to lock cache for space reservation in filesystem cache )", 0) \ - M(UInt64, temporary_data_in_cache_reserve_space_wait_lock_timeout_milliseconds, (10 * 60 * 1000), R"( + DECLARE(UInt64, temporary_data_in_cache_reserve_space_wait_lock_timeout_milliseconds, (10 * 60 * 1000), R"( Wait time to lock cache for space reservation for temporary data in filesystem cache )", 0) \ \ - M(Bool, use_page_cache_for_disks_without_file_cache, false, R"( + DECLARE(Bool, use_page_cache_for_disks_without_file_cache, false, R"( Use userspace page cache for remote disks that don't have filesystem cache enabled. )", 0) \ - M(Bool, read_from_page_cache_if_exists_otherwise_bypass_cache, false, R"( + DECLARE(Bool, read_from_page_cache_if_exists_otherwise_bypass_cache, false, R"( Use userspace page cache in passive mode, similar to read_from_filesystem_cache_if_exists_otherwise_bypass_cache. )", 0) \ - M(Bool, page_cache_inject_eviction, false, R"( + DECLARE(Bool, page_cache_inject_eviction, false, R"( Userspace page cache will sometimes invalidate some pages at random. Intended for testing. )", 0) \ \ - M(Bool, load_marks_asynchronously, false, R"( + DECLARE(Bool, load_marks_asynchronously, false, R"( Load MergeTree marks asynchronously )", 0) \ - M(Bool, enable_filesystem_read_prefetches_log, false, R"( + DECLARE(Bool, enable_filesystem_read_prefetches_log, false, R"( Log to system.filesystem prefetch_log during query. Should be used only for testing or debugging, not recommended to be turned on by default )", 0) \ - M(Bool, allow_prefetched_read_pool_for_remote_filesystem, true, R"( + DECLARE(Bool, allow_prefetched_read_pool_for_remote_filesystem, true, R"( Prefer prefetched threadpool if all parts are on remote filesystem )", 0) \ - M(Bool, allow_prefetched_read_pool_for_local_filesystem, false, R"( + DECLARE(Bool, allow_prefetched_read_pool_for_local_filesystem, false, R"( Prefer prefetched threadpool if all parts are on local filesystem )", 0) \ \ - M(UInt64, prefetch_buffer_size, DBMS_DEFAULT_BUFFER_SIZE, R"( + DECLARE(UInt64, prefetch_buffer_size, DBMS_DEFAULT_BUFFER_SIZE, R"( The maximum size of the prefetch buffer to read from the filesystem. )", 0) \ - M(UInt64, filesystem_prefetch_step_bytes, 0, R"( + DECLARE(UInt64, filesystem_prefetch_step_bytes, 0, R"( Prefetch step in bytes. Zero means `auto` - approximately the best prefetch step will be auto deduced, but might not be 100% the best. The actual value might be different because of setting filesystem_prefetch_min_bytes_for_single_read_task )", 0) \ - M(UInt64, filesystem_prefetch_step_marks, 0, R"( + DECLARE(UInt64, filesystem_prefetch_step_marks, 0, R"( Prefetch step in marks. Zero means `auto` - approximately the best prefetch step will be auto deduced, but might not be 100% the best. The actual value might be different because of setting filesystem_prefetch_min_bytes_for_single_read_task )", 0) \ - M(UInt64, filesystem_prefetch_max_memory_usage, "1Gi", R"( + DECLARE(UInt64, filesystem_prefetch_max_memory_usage, "1Gi", R"( Maximum memory usage for prefetches. )", 0) \ - M(UInt64, filesystem_prefetches_limit, 200, R"( + DECLARE(UInt64, filesystem_prefetches_limit, 200, R"( Maximum number of prefetches. Zero means unlimited. A setting `filesystem_prefetches_max_memory_usage` is more recommended if you want to limit the number of prefetches )", 0) \ \ - M(UInt64, use_structure_from_insertion_table_in_table_functions, 2, R"( + DECLARE(UInt64, use_structure_from_insertion_table_in_table_functions, 2, R"( Use structure from insertion table instead of schema inference from data. Possible values: 0 - disabled, 1 - enabled, 2 - auto )", 0) \ \ - M(UInt64, http_max_tries, 10, R"( + DECLARE(UInt64, http_max_tries, 10, R"( Max attempts to read via http. )", 0) \ - M(UInt64, http_retry_initial_backoff_ms, 100, R"( + DECLARE(UInt64, http_retry_initial_backoff_ms, 100, R"( Min milliseconds for backoff, when retrying read via http )", 0) \ - M(UInt64, http_retry_max_backoff_ms, 10000, R"( + DECLARE(UInt64, http_retry_max_backoff_ms, 10000, R"( Max milliseconds for backoff, when retrying read via http )", 0) \ \ - M(Bool, force_remove_data_recursively_on_drop, false, R"( + DECLARE(Bool, force_remove_data_recursively_on_drop, false, R"( Recursively remove data on DROP query. Avoids 'Directory not empty' error, but may silently remove detached data )", 0) \ - M(Bool, check_table_dependencies, true, R"( + DECLARE(Bool, check_table_dependencies, true, R"( Check that DDL query (such as DROP TABLE or RENAME) will not break dependencies )", 0) \ - M(Bool, check_referential_table_dependencies, false, R"( + DECLARE(Bool, check_referential_table_dependencies, false, R"( Check that DDL query (such as DROP TABLE or RENAME) will not break referential dependencies )", 0) \ - M(Bool, use_local_cache_for_remote_storage, true, R"( + DECLARE(Bool, use_local_cache_for_remote_storage, true, R"( Use local cache for remote storage like HDFS or S3, it's used for remote table engine only )", 0) \ \ - M(Bool, allow_unrestricted_reads_from_keeper, false, R"( + DECLARE(Bool, allow_unrestricted_reads_from_keeper, false, R"( Allow unrestricted (without condition on path) reads from system.zookeeper table, can be handy, but is not safe for zookeeper )", 0) \ - M(Bool, allow_deprecated_database_ordinary, false, R"( + DECLARE(Bool, allow_deprecated_database_ordinary, false, R"( Allow to create databases with deprecated Ordinary engine )", 0) \ - M(Bool, allow_deprecated_syntax_for_merge_tree, false, R"( + DECLARE(Bool, allow_deprecated_syntax_for_merge_tree, false, R"( Allow to create *MergeTree tables with deprecated engine definition syntax )", 0) \ - M(Bool, allow_asynchronous_read_from_io_pool_for_merge_tree, false, R"( + DECLARE(Bool, allow_asynchronous_read_from_io_pool_for_merge_tree, false, R"( Use background I/O pool to read from MergeTree tables. This setting may increase performance for I/O bound queries )", 0) \ - M(UInt64, max_streams_for_merge_tree_reading, 0, R"( + DECLARE(UInt64, max_streams_for_merge_tree_reading, 0, R"( If is not zero, limit the number of reading streams for MergeTree table. )", 0) \ \ - M(Bool, force_grouping_standard_compatibility, true, R"( + DECLARE(Bool, force_grouping_standard_compatibility, true, R"( Make GROUPING function to return 1 when argument is not used as an aggregation key )", 0) \ \ - M(Bool, schema_inference_use_cache_for_file, true, R"( + DECLARE(Bool, schema_inference_use_cache_for_file, true, R"( Use cache in schema inference while using file table function )", 0) \ - M(Bool, schema_inference_use_cache_for_s3, true, R"( + DECLARE(Bool, schema_inference_use_cache_for_s3, true, R"( Use cache in schema inference while using s3 table function )", 0) \ - M(Bool, schema_inference_use_cache_for_azure, true, R"( + DECLARE(Bool, schema_inference_use_cache_for_azure, true, R"( Use cache in schema inference while using azure table function )", 0) \ - M(Bool, schema_inference_use_cache_for_hdfs, true, R"( + DECLARE(Bool, schema_inference_use_cache_for_hdfs, true, R"( Use cache in schema inference while using hdfs table function )", 0) \ - M(Bool, schema_inference_use_cache_for_url, true, R"( + DECLARE(Bool, schema_inference_use_cache_for_url, true, R"( Use cache in schema inference while using url table function )", 0) \ - M(Bool, schema_inference_cache_require_modification_time_for_url, true, R"( + DECLARE(Bool, schema_inference_cache_require_modification_time_for_url, true, R"( Use schema from cache for URL with last modification time validation (for URLs with Last-Modified header) )", 0) \ \ - M(String, compatibility, "", R"( + DECLARE(String, compatibility, "", R"( The `compatibility` setting causes ClickHouse to use the default settings of a previous version of ClickHouse, where the previous version is provided as the setting. If settings are set to non-default values, then those settings are honored (only settings that have not been modified are affected by the `compatibility` setting). @@ -4968,7 +4958,7 @@ In ClickHouse Cloud the compatibility setting must be set by ClickHouse Cloud su ::: )", 0) \ \ - M(Map, additional_table_filters, "", R"( + DECLARE(Map, additional_table_filters, "", R"( An additional filter expression that is applied after reading from the specified table. @@ -4999,7 +4989,7 @@ SETTINGS additional_table_filters = {'table_1': 'x != 2'} └───┴──────┘ ``` )", 0) \ - M(String, additional_result_filter, "", R"( + DECLARE(String, additional_result_filter, "", R"( An additional filter expression to apply to the result of `SELECT` query. This setting is not applied to any subquery. @@ -5031,14 +5021,14 @@ SETTINGS additional_result_filter = 'x != 2' ``` )", 0) \ \ - M(String, workload, "default", R"( + DECLARE(String, workload, "default", R"( Name of workload to be used to access resources )", 0) \ - M(Milliseconds, storage_system_stack_trace_pipe_read_timeout_ms, 100, R"( + DECLARE(Milliseconds, storage_system_stack_trace_pipe_read_timeout_ms, 100, R"( Maximum time to read from a pipe for receiving information from the threads when querying the `system.stack_trace` table. This setting is used for testing purposes and not meant to be changed by users. )", 0) \ \ - M(String, rename_files_after_processing, "", R"( + DECLARE(String, rename_files_after_processing, "", R"( - **Type:** String - **Default value:** Empty string @@ -5063,53 +5053,53 @@ If reading `sample.csv` is successful, file will be renamed to `processed_sample )", 0) \ \ /* CLOUD ONLY */ \ - M(Bool, read_through_distributed_cache, false, R"( + DECLARE(Bool, read_through_distributed_cache, false, R"( Only in ClickHouse Cloud. Allow reading from distributed cache )", 0) \ - M(Bool, write_through_distributed_cache, false, R"( + DECLARE(Bool, write_through_distributed_cache, false, R"( Only in ClickHouse Cloud. Allow writing to distributed cache (writing to s3 will also be done by distributed cache) )", 0) \ - M(Bool, distributed_cache_throw_on_error, false, R"( + DECLARE(Bool, distributed_cache_throw_on_error, false, R"( Only in ClickHouse Cloud. Rethrow exception happened during communication with distributed cache or exception received from distributed cache. Otherwise fallback to skipping distributed cache on error )", 0) \ - M(DistributedCacheLogMode, distributed_cache_log_mode, DistributedCacheLogMode::LOG_ON_ERROR, R"( + DECLARE(DistributedCacheLogMode, distributed_cache_log_mode, DistributedCacheLogMode::LOG_ON_ERROR, R"( Only in ClickHouse Cloud. Mode for writing to system.distributed_cache_log )", 0) \ - M(Bool, distributed_cache_fetch_metrics_only_from_current_az, true, R"( + DECLARE(Bool, distributed_cache_fetch_metrics_only_from_current_az, true, R"( Only in ClickHouse Cloud. Fetch metrics only from current availability zone in system.distributed_cache_metrics, system.distributed_cache_events )", 0) \ - M(UInt64, distributed_cache_connect_max_tries, 100, R"( + DECLARE(UInt64, distributed_cache_connect_max_tries, 100, R"( Only in ClickHouse Cloud. Number of tries to connect to distributed cache if unsuccessful )", 0) \ - M(UInt64, distributed_cache_receive_response_wait_milliseconds, 60000, R"( + DECLARE(UInt64, distributed_cache_receive_response_wait_milliseconds, 60000, R"( Only in ClickHouse Cloud. Wait time in milliseconds to receive data for request from distributed cache )", 0) \ - M(UInt64, distributed_cache_receive_timeout_milliseconds, 10000, R"( + DECLARE(UInt64, distributed_cache_receive_timeout_milliseconds, 10000, R"( Only in ClickHouse Cloud. Wait time in milliseconds to receive any kind of response from distributed cache )", 0) \ - M(UInt64, distributed_cache_wait_connection_from_pool_milliseconds, 100, R"( + DECLARE(UInt64, distributed_cache_wait_connection_from_pool_milliseconds, 100, R"( Only in ClickHouse Cloud. Wait time in milliseconds to receive connection from connection pool if distributed_cache_pool_behaviour_on_limit is wait )", 0) \ - M(Bool, distributed_cache_bypass_connection_pool, false, R"( + DECLARE(Bool, distributed_cache_bypass_connection_pool, false, R"( Only in ClickHouse Cloud. Allow to bypass distributed cache connection pool )", 0) \ - M(DistributedCachePoolBehaviourOnLimit, distributed_cache_pool_behaviour_on_limit, DistributedCachePoolBehaviourOnLimit::ALLOCATE_NEW_BYPASSING_POOL, R"( + DECLARE(DistributedCachePoolBehaviourOnLimit, distributed_cache_pool_behaviour_on_limit, DistributedCachePoolBehaviourOnLimit::ALLOCATE_NEW_BYPASSING_POOL, R"( Only in ClickHouse Cloud. Identifies behaviour of distributed cache connection on pool limit reached )", 0) \ - M(UInt64, distributed_cache_read_alignment, 0, R"( + DECLARE(UInt64, distributed_cache_read_alignment, 0, R"( Only in ClickHouse Cloud. A setting for testing purposes, do not change it )", 0) \ - M(UInt64, distributed_cache_max_unacked_inflight_packets, DistributedCache::MAX_UNACKED_INFLIGHT_PACKETS, R"( + DECLARE(UInt64, distributed_cache_max_unacked_inflight_packets, DistributedCache::MAX_UNACKED_INFLIGHT_PACKETS, R"( Only in ClickHouse Cloud. A maximum number of unacknowledged in-flight packets in a single distributed cache read request )", 0) \ - M(UInt64, distributed_cache_data_packet_ack_window, DistributedCache::ACK_DATA_PACKET_WINDOW, R"( + DECLARE(UInt64, distributed_cache_data_packet_ack_window, DistributedCache::ACK_DATA_PACKET_WINDOW, R"( Only in ClickHouse Cloud. A window for sending ACK for DataPacket sequence in a single distributed cache read request )", 0) \ \ - M(Bool, parallelize_output_from_storages, true, R"( + DECLARE(Bool, parallelize_output_from_storages, true, R"( Parallelize output for reading step from storage. It allows parallelization of query processing right after reading from storage if possible )", 0) \ - M(String, insert_deduplication_token, "", R"( + DECLARE(String, insert_deduplication_token, "", R"( The setting allows a user to provide own deduplication semantic in MergeTree/ReplicatedMergeTree For example, by providing a unique value for the setting in each INSERT statement, user can avoid the same inserted data being deduplicated. @@ -5155,31 +5145,31 @@ SELECT * FROM test_table └───┘ ``` )", 0) \ - M(Bool, count_distinct_optimization, false, R"( + DECLARE(Bool, count_distinct_optimization, false, R"( Rewrite count distinct to subquery of group by )", 0) \ - M(Bool, throw_if_no_data_to_insert, true, R"( + DECLARE(Bool, throw_if_no_data_to_insert, true, R"( Allows or forbids empty INSERTs, enabled by default (throws an error on an empty insert). Only applies to INSERTs using [`clickhouse-client`](/docs/en/interfaces/cli) or using the [gRPC interface](/docs/en/interfaces/grpc). )", 0) \ - M(Bool, compatibility_ignore_auto_increment_in_create_table, false, R"( + DECLARE(Bool, compatibility_ignore_auto_increment_in_create_table, false, R"( Ignore AUTO_INCREMENT keyword in column declaration if true, otherwise return error. It simplifies migration from MySQL )", 0) \ - M(Bool, multiple_joins_try_to_keep_original_names, false, R"( + DECLARE(Bool, multiple_joins_try_to_keep_original_names, false, R"( Do not add aliases to top level expression list on multiple joins rewrite )", 0) \ - M(Bool, optimize_sorting_by_input_stream_properties, true, R"( + DECLARE(Bool, optimize_sorting_by_input_stream_properties, true, R"( Optimize sorting by sorting properties of input stream )", 0) \ - M(UInt64, keeper_max_retries, 10, R"( + DECLARE(UInt64, keeper_max_retries, 10, R"( Max retries for general keeper operations )", 0) \ - M(UInt64, keeper_retry_initial_backoff_ms, 100, R"( + DECLARE(UInt64, keeper_retry_initial_backoff_ms, 100, R"( Initial backoff timeout for general keeper operations )", 0) \ - M(UInt64, keeper_retry_max_backoff_ms, 5000, R"( + DECLARE(UInt64, keeper_retry_max_backoff_ms, 5000, R"( Max backoff timeout for general keeper operations )", 0) \ - M(UInt64, insert_keeper_max_retries, 20, R"( + DECLARE(UInt64, insert_keeper_max_retries, 20, R"( The setting sets the maximum number of retries for ClickHouse Keeper (or ZooKeeper) requests during insert into replicated MergeTree. Only Keeper requests which failed due to network error, Keeper session timeout, or request timeout are considered for retries. Possible values: @@ -5199,7 +5189,7 @@ For example, if `insert_keeper_retry_initial_backoff_ms=100`, `insert_keeper_ret Apart from fault tolerance, the retries aim to provide a better user experience - they allow to avoid returning an error during INSERT execution if Keeper is restarted, for example, due to an upgrade. )", 0) \ - M(UInt64, insert_keeper_retry_initial_backoff_ms, 100, R"( + DECLARE(UInt64, insert_keeper_retry_initial_backoff_ms, 100, R"( Initial timeout(in milliseconds) to retry a failed Keeper request during INSERT query execution Possible values: @@ -5207,7 +5197,7 @@ Possible values: - Positive integer. - 0 — No timeout )", 0) \ - M(UInt64, insert_keeper_retry_max_backoff_ms, 10000, R"( + DECLARE(UInt64, insert_keeper_retry_max_backoff_ms, 10000, R"( Maximum timeout (in milliseconds) to retry a failed Keeper request during INSERT query execution Possible values: @@ -5215,19 +5205,19 @@ Possible values: - Positive integer. - 0 — Maximum timeout is not limited )", 0) \ - M(Float, insert_keeper_fault_injection_probability, 0.0f, R"( + DECLARE(Float, insert_keeper_fault_injection_probability, 0.0f, R"( Approximate probability of failure for a keeper request during insert. Valid value is in interval [0.0f, 1.0f] )", 0) \ - M(UInt64, insert_keeper_fault_injection_seed, 0, R"( + DECLARE(UInt64, insert_keeper_fault_injection_seed, 0, R"( 0 - random seed, otherwise the setting value )", 0) \ - M(Bool, force_aggregation_in_order, false, R"( + DECLARE(Bool, force_aggregation_in_order, false, R"( The setting is used by the server itself to support distributed queries. Do not change it manually, because it will break normal operations. (Forces use of aggregation in order on remote nodes during distributed aggregation). )", IMPORTANT) \ - M(UInt64, http_max_request_param_data_size, 10_MiB, R"( + DECLARE(UInt64, http_max_request_param_data_size, 10_MiB, R"( Limit on size of request data used as a query parameter in predefined HTTP requests. )", 0) \ - M(Bool, function_json_value_return_type_allow_nullable, false, R"( + DECLARE(Bool, function_json_value_return_type_allow_nullable, false, R"( Control whether allow to return `NULL` when value is not exist for JSON_VALUE function. ```sql @@ -5245,7 +5235,7 @@ Possible values: - true — Allow. - false — Disallow. )", 0) \ - M(Bool, function_json_value_return_type_allow_complex, false, R"( + DECLARE(Bool, function_json_value_return_type_allow_complex, false, R"( Control whether allow to return complex type (such as: struct, array, map) for json_value function. ```sql @@ -5263,13 +5253,13 @@ Possible values: - true — Allow. - false — Disallow. )", 0) \ - M(Bool, use_with_fill_by_sorting_prefix, true, R"( + DECLARE(Bool, use_with_fill_by_sorting_prefix, true, R"( Columns preceding WITH FILL columns in ORDER BY clause form sorting prefix. Rows with different values in sorting prefix are filled independently )", 0) \ - M(Bool, optimize_uniq_to_count, true, R"( + DECLARE(Bool, optimize_uniq_to_count, true, R"( Rewrite uniq and its variants(except uniqUpTo) to count if subquery has distinct or group by clause. )", 0) \ - M(Bool, use_variant_as_common_type, false, R"( + DECLARE(Bool, use_variant_as_common_type, false, R"( Allows to use `Variant` type as a result type for [if](../../sql-reference/functions/conditional-functions.md/#if)/[multiIf](../../sql-reference/functions/conditional-functions.md/#multiif)/[array](../../sql-reference/functions/array-functions.md)/[map](../../sql-reference/functions/tuple-map-functions.md) functions when there is no common type for argument types. Example: @@ -5348,7 +5338,7 @@ SELECT map('a', range(number), 'b', number, 'c', 'str_' || toString(number)) as └───────────────────────────────┘ ``` )", 0) \ - M(Bool, enable_order_by_all, true, R"( + DECLARE(Bool, enable_order_by_all, true, R"( Enables or disables sorting with `ORDER BY ALL` syntax, see [ORDER BY](../../sql-reference/statements/select/order-by.md). Possible values: @@ -5380,34 +5370,34 @@ Result: └────┴────┴─────┘ ``` )", 0) \ - M(Float, ignore_drop_queries_probability, 0, R"( + DECLARE(Float, ignore_drop_queries_probability, 0, R"( If enabled, server will ignore all DROP table queries with specified probability (for Memory and JOIN engines it will replcase DROP to TRUNCATE). Used for testing purposes )", 0) \ - M(Bool, traverse_shadow_remote_data_paths, false, R"( + DECLARE(Bool, traverse_shadow_remote_data_paths, false, R"( Traverse frozen data (shadow directory) in addition to actual table data when query system.remote_data_paths )", 0) \ - M(Bool, geo_distance_returns_float64_on_float64_arguments, true, R"( + DECLARE(Bool, geo_distance_returns_float64_on_float64_arguments, true, R"( If all four arguments to `geoDistance`, `greatCircleDistance`, `greatCircleAngle` functions are Float64, return Float64 and use double precision for internal calculations. In previous ClickHouse versions, the functions always returned Float32. )", 0) \ - M(Bool, allow_get_client_http_header, false, R"( + DECLARE(Bool, allow_get_client_http_header, false, R"( Allow to use the function `getClientHTTPHeader` which lets to obtain a value of an the current HTTP request's header. It is not enabled by default for security reasons, because some headers, such as `Cookie`, could contain sensitive info. Note that the `X-ClickHouse-*` and `Authentication` headers are always restricted and cannot be obtained with this function. )", 0) \ - M(Bool, cast_string_to_dynamic_use_inference, false, R"( + DECLARE(Bool, cast_string_to_dynamic_use_inference, false, R"( Use types inference during String to Dynamic conversion )", 0) \ - M(Bool, enable_blob_storage_log, true, R"( + DECLARE(Bool, enable_blob_storage_log, true, R"( Write information about blob storage operations to system.blob_storage_log table )", 0) \ - M(Bool, use_json_alias_for_old_object_type, false, R"( + DECLARE(Bool, use_json_alias_for_old_object_type, false, R"( When enabled, `JSON` data type alias will be used to create an old [Object('json')](../../sql-reference/data-types/json.md) type instead of the new [JSON](../../sql-reference/data-types/newjson.md) type. )", 0) \ - M(Bool, allow_create_index_without_type, false, R"( + DECLARE(Bool, allow_create_index_without_type, false, R"( Allow CREATE INDEX query without TYPE. Query will be ignored. Made for SQL compatibility tests. )", 0) \ - M(Bool, create_index_ignore_unique, false, R"( + DECLARE(Bool, create_index_ignore_unique, false, R"( Ignore UNIQUE keyword in CREATE UNIQUE INDEX. Made for SQL compatibility tests. )", 0) \ - M(Bool, print_pretty_type_names, true, R"( + DECLARE(Bool, print_pretty_type_names, true, R"( Allows to print deep-nested type names in a pretty way with indents in `DESCRIBE` query and in `toTypeName()` function. Example: @@ -5439,99 +5429,99 @@ a Tuple( ) ``` )", 0) \ - M(Bool, create_table_empty_primary_key_by_default, false, R"( + DECLARE(Bool, create_table_empty_primary_key_by_default, false, R"( Allow to create *MergeTree tables with empty primary key when ORDER BY and PRIMARY KEY not specified )", 0) \ - M(Bool, allow_named_collection_override_by_default, true, R"( + DECLARE(Bool, allow_named_collection_override_by_default, true, R"( Allow named collections' fields override by default. )", 0) \ - M(SQLSecurityType, default_normal_view_sql_security, SQLSecurityType::INVOKER, R"( + DECLARE(SQLSecurityType, default_normal_view_sql_security, SQLSecurityType::INVOKER, R"( Allows to set default `SQL SECURITY` option while creating a normal view. [More about SQL security](../../sql-reference/statements/create/view.md#sql_security). The default value is `INVOKER`. )", 0) \ - M(SQLSecurityType, default_materialized_view_sql_security, SQLSecurityType::DEFINER, R"( + DECLARE(SQLSecurityType, default_materialized_view_sql_security, SQLSecurityType::DEFINER, R"( Allows to set a default value for SQL SECURITY option when creating a materialized view. [More about SQL security](../../sql-reference/statements/create/view.md#sql_security). The default value is `DEFINER`. )", 0) \ - M(String, default_view_definer, "CURRENT_USER", R"( + DECLARE(String, default_view_definer, "CURRENT_USER", R"( Allows to set default `DEFINER` option while creating a view. [More about SQL security](../../sql-reference/statements/create/view.md#sql_security). The default value is `CURRENT_USER`. )", 0) \ - M(UInt64, cache_warmer_threads, 4, R"( + DECLARE(UInt64, cache_warmer_threads, 4, R"( Only available in ClickHouse Cloud. Number of background threads for speculatively downloading new data parts into file cache, when cache_populated_by_fetch is enabled. Zero to disable. )", 0) \ - M(Int64, ignore_cold_parts_seconds, 0, R"( + DECLARE(Int64, ignore_cold_parts_seconds, 0, R"( Only available in ClickHouse Cloud. Exclude new data parts from SELECT queries until they're either pre-warmed (see cache_populated_by_fetch) or this many seconds old. Only for Replicated-/SharedMergeTree. )", 0) \ - M(Int64, prefer_warmed_unmerged_parts_seconds, 0, R"( + DECLARE(Int64, prefer_warmed_unmerged_parts_seconds, 0, R"( Only available in ClickHouse Cloud. If a merged part is less than this many seconds old and is not pre-warmed (see cache_populated_by_fetch), but all its source parts are available and pre-warmed, SELECT queries will read from those parts instead. Only for ReplicatedMergeTree. Note that this only checks whether CacheWarmer processed the part; if the part was fetched into cache by something else, it'll still be considered cold until CacheWarmer gets to it; if it was warmed, then evicted from cache, it'll still be considered warm. )", 0) \ - M(Bool, iceberg_engine_ignore_schema_evolution, false, R"( + DECLARE(Bool, iceberg_engine_ignore_schema_evolution, false, R"( Allow to ignore schema evolution in Iceberg table engine and read all data using schema specified by the user on table creation or latest schema parsed from metadata on table creation. :::note Enabling this setting can lead to incorrect result as in case of evolved schema all data files will be read using the same schema. ::: )", 0) \ - M(Bool, allow_deprecated_error_prone_window_functions, false, R"( + DECLARE(Bool, allow_deprecated_error_prone_window_functions, false, R"( Allow usage of deprecated error prone window functions (neighbor, runningAccumulate, runningDifferenceStartingWithFirstValue, runningDifference) )", 0) \ - M(Bool, allow_deprecated_snowflake_conversion_functions, false, R"( + DECLARE(Bool, allow_deprecated_snowflake_conversion_functions, false, R"( Functions `snowflakeToDateTime`, `snowflakeToDateTime64`, `dateTimeToSnowflake`, and `dateTime64ToSnowflake` are deprecated and disabled by default. Please use functions `snowflakeIDToDateTime`, `snowflakeIDToDateTime64`, `dateTimeToSnowflakeID`, and `dateTime64ToSnowflakeID` instead. To re-enable the deprecated functions (e.g., during a transition period), please set this setting to `true`. )", 0) \ - M(Bool, optimize_distinct_in_order, true, R"( + DECLARE(Bool, optimize_distinct_in_order, true, R"( Enable DISTINCT optimization if some columns in DISTINCT form a prefix of sorting. For example, prefix of sorting key in merge tree or ORDER BY statement )", 0) \ - M(Bool, keeper_map_strict_mode, false, R"( + DECLARE(Bool, keeper_map_strict_mode, false, R"( Enforce additional checks during operations on KeeperMap. E.g. throw an exception on an insert for already existing key )", 0) \ - M(UInt64, extract_key_value_pairs_max_pairs_per_row, 1000, R"( + DECLARE(UInt64, extract_key_value_pairs_max_pairs_per_row, 1000, R"( Max number of pairs that can be produced by the `extractKeyValuePairs` function. Used as a safeguard against consuming too much memory. )", 0) ALIAS(extract_kvp_max_pairs_per_row) \ - M(Bool, restore_replace_external_engines_to_null, false, R"( + DECLARE(Bool, restore_replace_external_engines_to_null, false, R"( For testing purposes. Replaces all external engines to Null to not initiate external connections. )", 0) \ - M(Bool, restore_replace_external_table_functions_to_null, false, R"( + DECLARE(Bool, restore_replace_external_table_functions_to_null, false, R"( For testing purposes. Replaces all external table functions to Null to not initiate external connections. )", 0) \ - M(Bool, restore_replace_external_dictionary_source_to_null, false, R"( + DECLARE(Bool, restore_replace_external_dictionary_source_to_null, false, R"( Replace external dictionary sources to Null on restore. Useful for testing purposes )", 0) \ - M(Bool, create_if_not_exists, false, R"( + DECLARE(Bool, create_if_not_exists, false, R"( Enable `IF NOT EXISTS` for `CREATE` statement by default. If either this setting or `IF NOT EXISTS` is specified and a table with the provided name already exists, no exception will be thrown. )", 0) \ - M(Bool, enforce_strict_identifier_format, false, R"( + DECLARE(Bool, enforce_strict_identifier_format, false, R"( If enabled, only allow identifiers containing alphanumeric characters and underscores. )", 0) \ - M(Bool, mongodb_throw_on_unsupported_query, true, R"( + DECLARE(Bool, mongodb_throw_on_unsupported_query, true, R"( If enabled, MongoDB tables will return an error when a MongoDB query cannot be built. Otherwise, ClickHouse reads the full table and processes it locally. This option does not apply to the legacy implementation or when 'allow_experimental_analyzer=0'. )", 0) \ \ /* ###################################### */ \ /* ######## EXPERIMENTAL FEATURES ####### */ \ /* ###################################### */ \ - M(Bool, allow_experimental_materialized_postgresql_table, false, R"( + DECLARE(Bool, allow_experimental_materialized_postgresql_table, false, R"( Allows to use the MaterializedPostgreSQL table engine. Disabled by default, because this feature is experimental )", 0) \ - M(Bool, allow_experimental_funnel_functions, false, R"( + DECLARE(Bool, allow_experimental_funnel_functions, false, R"( Enable experimental functions for funnel analysis. )", 0) \ - M(Bool, allow_experimental_nlp_functions, false, R"( + DECLARE(Bool, allow_experimental_nlp_functions, false, R"( Enable experimental functions for natural language processing. )", 0) \ - M(Bool, allow_experimental_hash_functions, false, R"( + DECLARE(Bool, allow_experimental_hash_functions, false, R"( Enable experimental hash functions )", 0) \ - M(Bool, allow_experimental_object_type, false, R"( + DECLARE(Bool, allow_experimental_object_type, false, R"( Allow Object and JSON data types )", 0) \ - M(Bool, allow_experimental_time_series_table, false, R"( + DECLARE(Bool, allow_experimental_time_series_table, false, R"( Allows creation of tables with the [TimeSeries](../../engines/table-engines/integrations/time-series.md) table engine. Possible values: @@ -5539,55 +5529,55 @@ Possible values: - 0 — the [TimeSeries](../../engines/table-engines/integrations/time-series.md) table engine is disabled. - 1 — the [TimeSeries](../../engines/table-engines/integrations/time-series.md) table engine is enabled. )", 0) \ - M(Bool, allow_experimental_vector_similarity_index, false, R"( + DECLARE(Bool, allow_experimental_vector_similarity_index, false, R"( Allow experimental vector similarity index )", 0) \ - M(Bool, allow_experimental_variant_type, false, R"( + DECLARE(Bool, allow_experimental_variant_type, false, R"( Allows creation of experimental [Variant](../../sql-reference/data-types/variant.md). )", 0) \ - M(Bool, allow_experimental_dynamic_type, false, R"( + DECLARE(Bool, allow_experimental_dynamic_type, false, R"( Allow Dynamic data type )", 0) \ - M(Bool, allow_experimental_json_type, false, R"( + DECLARE(Bool, allow_experimental_json_type, false, R"( Allow JSON data type )", 0) \ - M(Bool, allow_experimental_codecs, false, R"( + DECLARE(Bool, allow_experimental_codecs, false, R"( If it is set to true, allow to specify experimental compression codecs (but we don't have those yet and this option does nothing). )", 0) \ - M(Bool, allow_experimental_shared_set_join, true, R"( + DECLARE(Bool, allow_experimental_shared_set_join, true, R"( Only in ClickHouse Cloud. Allow to create ShareSet and SharedJoin )", 0) \ - M(UInt64, max_limit_for_ann_queries, 1'000'000, R"( + DECLARE(UInt64, max_limit_for_ann_queries, 1'000'000, R"( SELECT queries with LIMIT bigger than this setting cannot use vector similarity indexes. Helps to prevent memory overflows in vector similarity indexes. )", 0) \ - M(UInt64, hnsw_candidate_list_size_for_search, 256, R"( + DECLARE(UInt64, hnsw_candidate_list_size_for_search, 256, R"( The size of the dynamic candidate list when searching the vector similarity index, also known as 'ef_search'. )", 0) \ - M(Bool, throw_on_unsupported_query_inside_transaction, true, R"( + DECLARE(Bool, throw_on_unsupported_query_inside_transaction, true, R"( Throw exception if unsupported query is used inside transaction )", 0) \ - M(TransactionsWaitCSNMode, wait_changes_become_visible_after_commit_mode, TransactionsWaitCSNMode::WAIT_UNKNOWN, R"( + DECLARE(TransactionsWaitCSNMode, wait_changes_become_visible_after_commit_mode, TransactionsWaitCSNMode::WAIT_UNKNOWN, R"( Wait for committed changes to become actually visible in the latest snapshot )", 0) \ - M(Bool, implicit_transaction, false, R"( + DECLARE(Bool, implicit_transaction, false, R"( If enabled and not already inside a transaction, wraps the query inside a full transaction (begin + commit or rollback) )", 0) \ - M(UInt64, grace_hash_join_initial_buckets, 1, R"( + DECLARE(UInt64, grace_hash_join_initial_buckets, 1, R"( Initial number of grace hash join buckets )", 0) \ - M(UInt64, grace_hash_join_max_buckets, 1024, R"( + DECLARE(UInt64, grace_hash_join_max_buckets, 1024, R"( Limit on the number of grace hash join buckets )", 0) \ - M(UInt64, join_to_sort_minimum_perkey_rows, 40, R"( + DECLARE(UInt64, join_to_sort_minimum_perkey_rows, 40, R"( The lower limit of per-key average rows in the right table to determine whether to rerange the right table by key in left or inner join. This setting ensures that the optimization is not applied for sparse table keys )", 0) \ - M(UInt64, join_to_sort_maximum_table_rows, 10000, R"( + DECLARE(UInt64, join_to_sort_maximum_table_rows, 10000, R"( The maximum number of rows in the right table to determine whether to rerange the right table by key in left or inner join. )", 0) \ - M(Bool, allow_experimental_join_right_table_sorting, false, R"( + DECLARE(Bool, allow_experimental_join_right_table_sorting, false, R"( If it is set to true, and the conditions of `join_to_sort_minimum_perkey_rows` and `join_to_sort_maximum_table_rows` are met, rerange the right table by key to improve the performance in left or inner hash join. )", 0) \ - M(Timezone, session_timezone, "", R"( + DECLARE(Timezone, session_timezone, "", R"( Sets the implicit time zone of the current session or query. The implicit time zone is the time zone applied to values of type DateTime/DateTime64 which have no explicitly specified time zone. The setting takes precedence over the globally configured (server-level) implicit time zone. @@ -5647,22 +5637,22 @@ This happens due to different parsing pipelines: - [timezone](../server-configuration-parameters/settings.md#timezone) )", 0) \ - M(Bool, use_hive_partitioning, false, R"( + DECLARE(Bool, use_hive_partitioning, false, R"( When enabled, ClickHouse will detect Hive-style partitioning in path (`/name=value/`) in file-like table engines [File](../../engines/table-engines/special/file.md#hive-style-partitioning)/[S3](../../engines/table-engines/integrations/s3.md#hive-style-partitioning)/[URL](../../engines/table-engines/special/url.md#hive-style-partitioning)/[HDFS](../../engines/table-engines/integrations/hdfs.md#hive-style-partitioning)/[AzureBlobStorage](../../engines/table-engines/integrations/azureBlobStorage.md#hive-style-partitioning) and will allow to use partition columns as virtual columns in the query. These virtual columns will have the same names as in the partitioned path, but starting with `_`. )", 0)\ \ - M(Bool, allow_statistics_optimize, false, R"( + DECLARE(Bool, allow_statistics_optimize, false, R"( Allows using statistics to optimize queries )", 0) ALIAS(allow_statistic_optimize) \ - M(Bool, allow_experimental_statistics, false, R"( + DECLARE(Bool, allow_experimental_statistics, false, R"( Allows defining columns with [statistics](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) and [manipulate statistics](../../engines/table-engines/mergetree-family/mergetree.md#column-statistics). )", 0) ALIAS(allow_experimental_statistic) \ \ /* Parallel replicas */ \ - M(UInt64, allow_experimental_parallel_reading_from_replicas, 0, R"( + DECLARE(UInt64, allow_experimental_parallel_reading_from_replicas, 0, R"( Use up to `max_parallel_replicas` the number of replicas from each shard for SELECT query execution. Reading is parallelized and coordinated dynamically. 0 - disabled, 1 - enabled, silently disable them in case of failure, 2 - enabled, throw an exception in case of failure )", 0) ALIAS(enable_parallel_replicas) \ - M(NonZeroUInt64, max_parallel_replicas, 1, R"( + DECLARE(NonZeroUInt64, max_parallel_replicas, 1, R"( The maximum number of replicas for each shard when executing a query. Possible values: @@ -5690,16 +5680,16 @@ A query may be processed faster if it is executed on several servers in parallel This setting is useful for any replicated table. )", 0) \ - M(ParallelReplicasMode, parallel_replicas_mode, ParallelReplicasMode::READ_TASKS, R"( + DECLARE(ParallelReplicasMode, parallel_replicas_mode, ParallelReplicasMode::READ_TASKS, R"( Type of filter to use with custom key for parallel replicas. default - use modulo operation on the custom key, range - use range filter on custom key using all possible values for the value type of custom key. )", 0) \ - M(UInt64, parallel_replicas_count, 0, R"( + DECLARE(UInt64, parallel_replicas_count, 0, R"( This is internal setting that should not be used directly and represents an implementation detail of the 'parallel replicas' mode. This setting will be automatically set up by the initiator server for distributed queries to the number of parallel replicas participating in query processing. )", 0) \ - M(UInt64, parallel_replica_offset, 0, R"( + DECLARE(UInt64, parallel_replica_offset, 0, R"( This is internal setting that should not be used directly and represents an implementation detail of the 'parallel replicas' mode. This setting will be automatically set up by the initiator server for distributed queries to the index of the replica participating in query processing among parallel replicas. )", 0) \ - M(String, parallel_replicas_custom_key, "", R"( + DECLARE(String, parallel_replicas_custom_key, "", R"( An arbitrary integer expression that can be used to split work between replicas for a specific table. The value can be any integer expression. @@ -5708,67 +5698,67 @@ Simple expressions using primary keys are preferred. If the setting is used on a cluster that consists of a single shard with multiple replicas, those replicas will be converted into virtual shards. Otherwise, it will behave same as for `SAMPLE` key, it will use multiple replicas of each shard. )", 0) \ - M(UInt64, parallel_replicas_custom_key_range_lower, 0, R"( + DECLARE(UInt64, parallel_replicas_custom_key_range_lower, 0, R"( Allows the filter type `range` to split the work evenly between replicas based on the custom range `[parallel_replicas_custom_key_range_lower, INT_MAX]`. When used in conjunction with [parallel_replicas_custom_key_range_upper](#parallel_replicas_custom_key_range_upper), it lets the filter evenly split the work over replicas for the range `[parallel_replicas_custom_key_range_lower, parallel_replicas_custom_key_range_upper]`. Note: This setting will not cause any additional data to be filtered during query processing, rather it changes the points at which the range filter breaks up the range `[0, INT_MAX]` for parallel processing. )", 0) \ - M(UInt64, parallel_replicas_custom_key_range_upper, 0, R"( + DECLARE(UInt64, parallel_replicas_custom_key_range_upper, 0, R"( Allows the filter type `range` to split the work evenly between replicas based on the custom range `[0, parallel_replicas_custom_key_range_upper]`. A value of 0 disables the upper bound, setting it the max value of the custom key expression. When used in conjunction with [parallel_replicas_custom_key_range_lower](#parallel_replicas_custom_key_range_lower), it lets the filter evenly split the work over replicas for the range `[parallel_replicas_custom_key_range_lower, parallel_replicas_custom_key_range_upper]`. Note: This setting will not cause any additional data to be filtered during query processing, rather it changes the points at which the range filter breaks up the range `[0, INT_MAX]` for parallel processing )", 0) \ - M(String, cluster_for_parallel_replicas, "", R"( + DECLARE(String, cluster_for_parallel_replicas, "", R"( Cluster for a shard in which current server is located )", 0) \ - M(Bool, parallel_replicas_allow_in_with_subquery, true, R"( + DECLARE(Bool, parallel_replicas_allow_in_with_subquery, true, R"( If true, subquery for IN will be executed on every follower replica. )", 0) \ - M(Float, parallel_replicas_single_task_marks_count_multiplier, 2, R"( + DECLARE(Float, parallel_replicas_single_task_marks_count_multiplier, 2, R"( A multiplier which will be added during calculation for minimal number of marks to retrieve from coordinator. This will be applied only for remote replicas. )", 0) \ - M(Bool, parallel_replicas_for_non_replicated_merge_tree, false, R"( + DECLARE(Bool, parallel_replicas_for_non_replicated_merge_tree, false, R"( If true, ClickHouse will use parallel replicas algorithm also for non-replicated MergeTree tables )", 0) \ - M(UInt64, parallel_replicas_min_number_of_rows_per_replica, 0, R"( + DECLARE(UInt64, parallel_replicas_min_number_of_rows_per_replica, 0, R"( Limit the number of replicas used in a query to (estimated rows to read / min_number_of_rows_per_replica). The max is still limited by 'max_parallel_replicas' )", 0) \ - M(Bool, parallel_replicas_prefer_local_join, true, R"( + DECLARE(Bool, parallel_replicas_prefer_local_join, true, R"( If true, and JOIN can be executed with parallel replicas algorithm, and all storages of right JOIN part are *MergeTree, local JOIN will be used instead of GLOBAL JOIN. )", 0) \ - M(UInt64, parallel_replicas_mark_segment_size, 0, R"( + DECLARE(UInt64, parallel_replicas_mark_segment_size, 0, R"( Parts virtually divided into segments to be distributed between replicas for parallel reading. This setting controls the size of these segments. Not recommended to change until you're absolutely sure in what you're doing. Value should be in range [128; 16384] )", 0) \ - M(Bool, allow_archive_path_syntax, true, R"( + DECLARE(Bool, allow_archive_path_syntax, true, R"( File/S3 engines/table function will parse paths with '::' as '\\ :: \\' if archive has correct extension )", 0) \ - M(Bool, parallel_replicas_local_plan, false, R"( + DECLARE(Bool, parallel_replicas_local_plan, false, R"( Build local plan for local replica )", 0) \ \ - M(Bool, allow_experimental_inverted_index, false, R"( + DECLARE(Bool, allow_experimental_inverted_index, false, R"( If it is set to true, allow to use experimental inverted index. )", 0) \ - M(Bool, allow_experimental_full_text_index, false, R"( + DECLARE(Bool, allow_experimental_full_text_index, false, R"( If it is set to true, allow to use experimental full-text index. )", 0) \ \ - M(Bool, allow_experimental_join_condition, false, R"( + DECLARE(Bool, allow_experimental_join_condition, false, R"( Support join with inequal conditions which involve columns from both left and right table. e.g. t1.y < t2.y. )", 0) \ \ - M(Bool, allow_experimental_analyzer, true, R"( + DECLARE(Bool, allow_experimental_analyzer, true, R"( Allow new query analyzer. )", IMPORTANT) ALIAS(enable_analyzer) \ - M(Bool, analyzer_compatibility_join_using_top_level_identifier, false, R"( + DECLARE(Bool, analyzer_compatibility_join_using_top_level_identifier, false, R"( Force to resolve identifier in JOIN USING from projection (for example, in `SELECT a + 1 AS b FROM t1 JOIN t2 USING (b)` join will be performed by `t1.a + 1 = t2.b`, rather then `t1.b = t2.b`). )", 0) \ \ - M(Bool, allow_experimental_live_view, false, R"( + DECLARE(Bool, allow_experimental_live_view, false, R"( Allows creation of a deprecated LIVE VIEW. Possible values: @@ -5776,48 +5766,48 @@ Possible values: - 0 — Working with live views is disabled. - 1 — Working with live views is enabled. )", 0) \ - M(Seconds, live_view_heartbeat_interval, 15, R"( + DECLARE(Seconds, live_view_heartbeat_interval, 15, R"( The heartbeat interval in seconds to indicate live query is alive. )", 0) \ - M(UInt64, max_live_view_insert_blocks_before_refresh, 64, R"( + DECLARE(UInt64, max_live_view_insert_blocks_before_refresh, 64, R"( Limit maximum number of inserted blocks after which mergeable blocks are dropped and query is re-executed. )", 0) \ \ - M(Bool, allow_experimental_window_view, false, R"( + DECLARE(Bool, allow_experimental_window_view, false, R"( Enable WINDOW VIEW. Not mature enough. )", 0) \ - M(Seconds, window_view_clean_interval, 60, R"( + DECLARE(Seconds, window_view_clean_interval, 60, R"( The clean interval of window view in seconds to free outdated data. )", 0) \ - M(Seconds, window_view_heartbeat_interval, 15, R"( + DECLARE(Seconds, window_view_heartbeat_interval, 15, R"( The heartbeat interval in seconds to indicate watch query is alive. )", 0) \ - M(Seconds, wait_for_window_view_fire_signal_timeout, 10, R"( + DECLARE(Seconds, wait_for_window_view_fire_signal_timeout, 10, R"( Timeout for waiting for window view fire signal in event time processing )", 0) \ \ - M(Bool, stop_refreshable_materialized_views_on_startup, false, R"( + DECLARE(Bool, stop_refreshable_materialized_views_on_startup, false, R"( On server startup, prevent scheduling of refreshable materialized views, as if with SYSTEM STOP VIEWS. You can manually start them with SYSTEM START VIEWS or SYSTEM START VIEW \\ afterwards. Also applies to newly created views. Has no effect on non-refreshable materialized views. )", 0) \ \ - M(Bool, allow_experimental_database_materialized_mysql, false, R"( + DECLARE(Bool, allow_experimental_database_materialized_mysql, false, R"( Allow to create database with Engine=MaterializedMySQL(...). )", 0) \ - M(Bool, allow_experimental_database_materialized_postgresql, false, R"( + DECLARE(Bool, allow_experimental_database_materialized_postgresql, false, R"( Allow to create database with Engine=MaterializedPostgreSQL(...). )", 0) \ \ /** Experimental feature for moving data between shards. */ \ - M(Bool, allow_experimental_query_deduplication, false, R"( + DECLARE(Bool, allow_experimental_query_deduplication, false, R"( Experimental data deduplication for SELECT queries based on part UUIDs )", 0) \ - M(Bool, implicit_select, false, R"( + DECLARE(Bool, implicit_select, false, R"( Allow writing simple SELECT queries without the leading SELECT keyword, which makes it simple for calculator-style usage, e.g. `1 + 2` becomes a valid query. )", 0) // End of COMMON_SETTINGS -// Please add settings related to formats in FormatFactorySettingsDeclaration.h, move obsolete settings to OBSOLETE_SETTINGS and obsolete format settings to OBSOLETE_FORMAT_SETTINGS. +// Please add settings related to formats in Core/FormatFactorySettings.h, move obsolete settings to OBSOLETE_SETTINGS and obsolete format settings to OBSOLETE_FORMAT_SETTINGS. #define OBSOLETE_SETTINGS(M, ALIAS) \ /** Obsolete settings that do nothing but left for compatibility reasons. Remove each one after half a year of obsolescence. */ \ @@ -5888,6 +5878,7 @@ Allow writing simple SELECT queries without the leading SELECT keyword, which ma MAKE_OBSOLETE(M, Bool, query_plan_optimize_primary_key, true) \ MAKE_OBSOLETE(M, Bool, optimize_monotonous_functions_in_order_by, false) \ MAKE_OBSOLETE(M, UInt64, http_max_chunk_size, 100_GiB) \ + MAKE_OBSOLETE(M, Bool, enable_deflate_qpl_codec, false) \ /** The section above is for obsolete settings. Do not add anything there. */ #endif /// __CLION_IDE__ diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp index 5092e00aece..d958d091975 100644 --- a/src/Core/SettingsChangesHistory.cpp +++ b/src/Core/SettingsChangesHistory.cpp @@ -77,13 +77,16 @@ static std::initializer_list #include #include #include +#include +#include #include #include #include #include -#include +#include #include #include -#include #include #include #include -#include namespace DB @@ -263,14 +264,6 @@ enum class DistributedDDLOutputMode : uint8_t DECLARE_SETTING_ENUM(DistributedDDLOutputMode) -enum class StreamingHandleErrorMode : uint8_t -{ - DEFAULT = 0, // Ignore errors with threshold. - STREAM, // Put errors to stream in the virtual column named ``_error. - /*FIXED_SYSTEM_TABLE, Put errors to in a fixed system table likely system.kafka_errors. This is not implemented now. */ - /*CUSTOM_SYSTEM_TABLE, Put errors to in a custom system table. This is not implemented now. */ -}; - DECLARE_SETTING_ENUM(StreamingHandleErrorMode) DECLARE_SETTING_ENUM(ShortCircuitFunctionEvaluation) diff --git a/src/Core/StreamingHandleErrorMode.h b/src/Core/StreamingHandleErrorMode.h new file mode 100644 index 00000000000..4e47c6f927f --- /dev/null +++ b/src/Core/StreamingHandleErrorMode.h @@ -0,0 +1,16 @@ +#pragma once + +#include + +namespace DB +{ + +enum class StreamingHandleErrorMode : uint8_t +{ + DEFAULT = 0, // Ignore errors with threshold. + STREAM, // Put errors to stream in the virtual column named ``_error. + /*FIXED_SYSTEM_TABLE, Put errors to in a fixed system table likely system.kafka_errors. This is not implemented now. */ + /*CUSTOM_SYSTEM_TABLE, Put errors to in a custom system table. This is not implemented now. */ +}; + +} diff --git a/src/DataTypes/DataTypeArray.h b/src/DataTypes/DataTypeArray.h index b242d871c36..f9ed734da0f 100644 --- a/src/DataTypes/DataTypeArray.h +++ b/src/DataTypes/DataTypeArray.h @@ -47,8 +47,8 @@ public: Field getDefault() const override; + DataTypePtr getNormalizedType() const override { return std::make_shared(nested->getNormalizedType()); } bool equals(const IDataType & rhs) const override; - bool isParametric() const override { return true; } bool haveSubtypes() const override { return true; } bool cannotBeStoredInTables() const override { return nested->cannotBeStoredInTables(); } diff --git a/src/DataTypes/DataTypeMap.h b/src/DataTypes/DataTypeMap.h index c506591ba79..1df93dc2b8b 100644 --- a/src/DataTypes/DataTypeMap.h +++ b/src/DataTypes/DataTypeMap.h @@ -43,7 +43,10 @@ public: bool isParametric() const override { return true; } bool haveSubtypes() const override { return true; } bool hasDynamicSubcolumnsDeprecated() const override { return nested->hasDynamicSubcolumnsDeprecated(); } - + DataTypePtr getNormalizedType() const override + { + return std::make_shared(key_type->getNormalizedType(), value_type->getNormalizedType()); + } const DataTypePtr & getKeyType() const { return key_type; } const DataTypePtr & getValueType() const { return value_type; } DataTypes getKeyValueTypes() const { return {key_type, value_type}; } diff --git a/src/DataTypes/DataTypeTuple.cpp b/src/DataTypes/DataTypeTuple.cpp index bceb0f844c8..1267338acb9 100644 --- a/src/DataTypes/DataTypeTuple.cpp +++ b/src/DataTypes/DataTypeTuple.cpp @@ -133,6 +133,14 @@ std::string DataTypeTuple::doGetPrettyName(size_t indent) const return s.str(); } +DataTypePtr DataTypeTuple::getNormalizedType() const +{ + DataTypes normalized_elems; + normalized_elems.reserve(elems.size()); + for (const auto & elem : elems) + normalized_elems.emplace_back(elem->getNormalizedType()); + return std::make_shared(normalized_elems); +} static inline IColumn & extractElementColumn(IColumn & column, size_t idx) { diff --git a/src/DataTypes/DataTypeTuple.h b/src/DataTypes/DataTypeTuple.h index fd00fce5a17..d7c97018e2e 100644 --- a/src/DataTypes/DataTypeTuple.h +++ b/src/DataTypes/DataTypeTuple.h @@ -61,6 +61,7 @@ public: MutableSerializationInfoPtr createSerializationInfo(const SerializationInfoSettings & settings) const override; SerializationInfoPtr getSerializationInfo(const IColumn & column) const override; + DataTypePtr getNormalizedType() const override; const DataTypePtr & getElement(size_t i) const { return elems[i]; } const DataTypes & getElements() const { return elems; } const Strings & getElementNames() const { return names; } diff --git a/src/DataTypes/IDataType.h b/src/DataTypes/IDataType.h index 2d1e1b9bc76..33eddf8e9b8 100644 --- a/src/DataTypes/IDataType.h +++ b/src/DataTypes/IDataType.h @@ -88,6 +88,15 @@ public: DataTypePtr getPtr() const { return shared_from_this(); } + /// Returns the normalized form of the current type, currently handling the + /// conversion of named tuples to unnamed tuples. + /// + /// This is useful for converting aggregate states into a normalized form with + /// normalized argument types. E.g, `AggregateFunction(uniq, Tuple(a int, b int))` + /// should be convertible to `AggregateFunction(uniq, Tuple(int, int))`, as both + /// have same memory layouts for state representation and the same serialization. + virtual DataTypePtr getNormalizedType() const { return shared_from_this(); } + /// Name of data type family (example: FixedString, Array). virtual const char * getFamilyName() const = 0; diff --git a/src/Databases/DatabaseReplicatedSettings.cpp b/src/Databases/DatabaseReplicatedSettings.cpp index 5ee37b55706..ae8fdbe6458 100644 --- a/src/Databases/DatabaseReplicatedSettings.cpp +++ b/src/Databases/DatabaseReplicatedSettings.cpp @@ -7,13 +7,13 @@ namespace DB { -#define LIST_OF_DATABASE_REPLICATED_SETTINGS(M, ALIAS) \ - M(Float, max_broken_tables_ratio, 1, "Do not recover replica automatically if the ratio of staled tables to all tables is greater", 0) \ - M(UInt64, max_replication_lag_to_enqueue, 50, "Replica will throw exception on attempt to execute query if its replication lag greater", 0) \ - M(UInt64, wait_entry_commited_timeout_sec, 3600, "Replicas will try to cancel query if timeout exceed, but initiator host has not executed it yet", 0) \ - M(String, collection_name, "", "A name of a collection defined in server's config where all info for cluster authentication is defined", 0) \ - M(Bool, check_consistency, true, "Check consistency of local metadata and metadata in Keeper, do replica recovery on inconsistency", 0) \ - M(UInt64, max_retries_before_automatic_recovery, 100, "Max number of attempts to execute a queue entry before marking replica as lost recovering it from snapshot (0 means infinite)", 0) \ +#define LIST_OF_DATABASE_REPLICATED_SETTINGS(DECLARE, ALIAS) \ + DECLARE(Float, max_broken_tables_ratio, 1, "Do not recover replica automatically if the ratio of staled tables to all tables is greater", 0) \ + DECLARE(UInt64, max_replication_lag_to_enqueue, 50, "Replica will throw exception on attempt to execute query if its replication lag greater", 0) \ + DECLARE(UInt64, wait_entry_commited_timeout_sec, 3600, "Replicas will try to cancel query if timeout exceed, but initiator host has not executed it yet", 0) \ + DECLARE(String, collection_name, "", "A name of a collection defined in server's config where all info for cluster authentication is defined", 0) \ + DECLARE(Bool, check_consistency, true, "Check consistency of local metadata and metadata in Keeper, do replica recovery on inconsistency", 0) \ + DECLARE(UInt64, max_retries_before_automatic_recovery, 100, "Max number of attempts to execute a queue entry before marking replica as lost recovering it from snapshot (0 means infinite)", 0) \ DECLARE_SETTINGS_TRAITS(DatabaseReplicatedSettingsTraits, LIST_OF_DATABASE_REPLICATED_SETTINGS) IMPLEMENT_SETTINGS_TRAITS(DatabaseReplicatedSettingsTraits, LIST_OF_DATABASE_REPLICATED_SETTINGS) diff --git a/src/Databases/MySQL/DatabaseMaterializedMySQL.cpp b/src/Databases/MySQL/DatabaseMaterializedMySQL.cpp index 067d1d4ed0a..e097ffab0d7 100644 --- a/src/Databases/MySQL/DatabaseMaterializedMySQL.cpp +++ b/src/Databases/MySQL/DatabaseMaterializedMySQL.cpp @@ -4,6 +4,7 @@ # include # include +# include # include # include @@ -18,6 +19,7 @@ # include # include # include +# include # include # include # include @@ -31,6 +33,15 @@ namespace Setting extern const SettingsUInt64 glob_expansion_max_elements; } +namespace MaterializedMySQLSetting +{ + extern const MaterializedMySQLSettingsBool allows_query_when_mysql_lost; + extern const MaterializedMySQLSettingsBool allow_startup_database_without_connection_to_mysql; + extern const MaterializedMySQLSettingsUInt64 max_bytes_in_binlog_dispatcher_buffer; + extern const MaterializedMySQLSettingsUInt64 max_flush_milliseconds_in_binlog_dispatcher; + extern const MaterializedMySQLSettingsBool use_binlog_client; +} + namespace ErrorCodes { extern const int NOT_IMPLEMENTED; @@ -53,11 +64,13 @@ DatabaseMaterializedMySQL::DatabaseMaterializedMySQL( { } +DatabaseMaterializedMySQL::~DatabaseMaterializedMySQL() = default; + void DatabaseMaterializedMySQL::rethrowExceptionIfNeeded() const { std::lock_guard lock(mutex); - if (!settings->allows_query_when_mysql_lost && exception) + if (!(*settings)[MaterializedMySQLSetting::allows_query_when_mysql_lost] && exception) { try { @@ -89,7 +102,7 @@ LoadTaskPtr DatabaseMaterializedMySQL::startupDatabaseAsync(AsyncLoader & async_ [this, mode] (AsyncLoader &, const LoadJobPtr &) { LOG_TRACE(log, "Starting MaterializeMySQL database"); - if (!settings->allow_startup_database_without_connection_to_mysql + if (!(*settings)[MaterializedMySQLSetting::allow_startup_database_without_connection_to_mysql] && mode < LoadingStrictnessLevel::FORCE_ATTACH) materialize_thread.assertMySQLAvailable(); @@ -266,11 +279,11 @@ void registerDatabaseMaterializedMySQL(DatabaseFactory & factory) if (engine_define->settings) materialize_mode_settings->loadFromQuery(*engine_define); - if (materialize_mode_settings->use_binlog_client) + if ((*materialize_mode_settings)[MaterializedMySQLSetting::use_binlog_client]) binlog_client = DB::MySQLReplication::BinlogClientFactory::instance().getClient( configuration.host, configuration.port, configuration.username, configuration.password, - materialize_mode_settings->max_bytes_in_binlog_dispatcher_buffer, - materialize_mode_settings->max_flush_milliseconds_in_binlog_dispatcher); + (*materialize_mode_settings)[MaterializedMySQLSetting::max_bytes_in_binlog_dispatcher_buffer], + (*materialize_mode_settings)[MaterializedMySQLSetting::max_flush_milliseconds_in_binlog_dispatcher]); if (args.uuid == UUIDHelpers::Nil) { diff --git a/src/Databases/MySQL/DatabaseMaterializedMySQL.h b/src/Databases/MySQL/DatabaseMaterializedMySQL.h index a6418e6fc5c..ca9ca4369b1 100644 --- a/src/Databases/MySQL/DatabaseMaterializedMySQL.h +++ b/src/Databases/MySQL/DatabaseMaterializedMySQL.h @@ -10,13 +10,14 @@ #include #include #include -#include #include #include namespace DB { +struct MaterializedMySQLSettings; + /** Real-time pull table structure and data from remote MySQL * * All table structure and data will be written to the local file system @@ -35,6 +36,8 @@ public: const MySQLReplication::BinlogClientPtr & binlog_client_, std::unique_ptr settings_); + ~DatabaseMaterializedMySQL() override; + void rethrowExceptionIfNeeded() const; void setException(const std::exception_ptr & exception); diff --git a/src/Databases/MySQL/DatabaseMySQL.cpp b/src/Databases/MySQL/DatabaseMySQL.cpp index 5d4441b3266..15a22003a1a 100644 --- a/src/Databases/MySQL/DatabaseMySQL.cpp +++ b/src/Databases/MySQL/DatabaseMySQL.cpp @@ -46,6 +46,11 @@ namespace Setting extern const SettingsUInt64 max_parser_depth; } +namespace MySQLSetting +{ + extern const MySQLSettingsMySQLDataTypesSupport mysql_datatypes_support_level; +} + namespace ErrorCodes { extern const int NOT_IMPLEMENTED; @@ -329,7 +334,7 @@ DatabaseMySQL::fetchTablesColumnsList(const std::vector & tables_name, C database_name_in_mysql, tables_name, settings, - mysql_settings->mysql_datatypes_support_level); + (*mysql_settings)[MySQLSetting::mysql_datatypes_support_level]); } void DatabaseMySQL::shutdown() diff --git a/src/Databases/MySQL/DatabaseMySQL.h b/src/Databases/MySQL/DatabaseMySQL.h index 8e9f99e303e..17dda594d01 100644 --- a/src/Databases/MySQL/DatabaseMySQL.h +++ b/src/Databases/MySQL/DatabaseMySQL.h @@ -9,7 +9,6 @@ #include #include #include -#include #include #include #include @@ -26,7 +25,7 @@ namespace DB { class Context; - +struct MySQLSettings; enum class MySQLDataTypesSupport : uint8_t; /** Real-time access to table list and table structure from remote MySQL diff --git a/src/Databases/MySQL/MaterializedMySQLSettings.cpp b/src/Databases/MySQL/MaterializedMySQLSettings.cpp index d314e1f35a9..93b0d4c9885 100644 --- a/src/Databases/MySQL/MaterializedMySQLSettings.cpp +++ b/src/Databases/MySQL/MaterializedMySQLSettings.cpp @@ -1,7 +1,8 @@ +#include +#include #include - -#include #include +#include namespace DB { @@ -11,15 +12,65 @@ namespace ErrorCodes extern const int UNKNOWN_SETTING; } +#define LIST_OF_MATERIALIZE_MODE_SETTINGS(DECLARE, ALIAS) \ + DECLARE(UInt64, max_rows_in_buffer, DEFAULT_BLOCK_SIZE, "Max rows that data is allowed to cache in memory(for single table and the cache data unable to query). when rows is exceeded, the data will be materialized", 0) \ + DECLARE(UInt64, max_bytes_in_buffer, DBMS_DEFAULT_BUFFER_SIZE, "Max bytes that data is allowed to cache in memory(for single table and the cache data unable to query). when rows is exceeded, the data will be materialized", 0) \ + DECLARE(UInt64, max_rows_in_buffers, DEFAULT_BLOCK_SIZE, "Max rows that data is allowed to cache in memory(for database and the cache data unable to query). when rows is exceeded, the data will be materialized", 0) \ + DECLARE(UInt64, max_bytes_in_buffers, DBMS_DEFAULT_BUFFER_SIZE, "Max bytes that data is allowed to cache in memory(for database and the cache data unable to query). when rows is exceeded, the data will be materialized", 0) \ + DECLARE(UInt64, max_flush_data_time, 1000, "Max milliseconds that data is allowed to cache in memory(for database and the cache data unable to query). when this time is exceeded, the data will be materialized", 0) \ + DECLARE(Int64, max_wait_time_when_mysql_unavailable, 1000, "Retry interval when MySQL is not available (milliseconds). Negative value disable retry.", 0) \ + DECLARE(Bool, allows_query_when_mysql_lost, false, "Allow query materialized table when mysql is lost.", 0) \ + DECLARE(String, materialized_mysql_tables_list, "", "a comma-separated list of mysql database tables, which will be replicated by MaterializedMySQL database engine. Default value: empty list — means whole tables will be replicated.", 0) \ + DECLARE(Bool, use_binlog_client, false, "Use MySQL Binlog Client.", 0) \ + DECLARE(UInt64, max_bytes_in_binlog_queue, 64 * 1024 * 1024, "Max bytes in binlog's queue created from MySQL Binlog Client.", 0) \ + DECLARE(UInt64, max_milliseconds_to_wait_in_binlog_queue, 10000, "Max milliseconds to wait when max bytes exceeded in a binlog queue.", 0) \ + DECLARE(UInt64, max_bytes_in_binlog_dispatcher_buffer, DBMS_DEFAULT_BUFFER_SIZE, "Max bytes in the binlog dispatcher's buffer before it is flushed to attached binlogs.", 0) \ + DECLARE(UInt64, max_flush_milliseconds_in_binlog_dispatcher, 1000, "Max milliseconds in the binlog dispatcher's buffer to wait before it is flushed to attached binlogs.", 0) \ + DECLARE(Bool, allow_startup_database_without_connection_to_mysql, false, "Allow to create and attach database without available connection to MySQL.", 0) \ + +DECLARE_SETTINGS_TRAITS(MaterializedMySQLSettingsTraits, LIST_OF_MATERIALIZE_MODE_SETTINGS) IMPLEMENT_SETTINGS_TRAITS(MaterializedMySQLSettingsTraits, LIST_OF_MATERIALIZE_MODE_SETTINGS) +struct MaterializedMySQLSettingsImpl : public BaseSettings +{ +}; + +#define INITIALIZE_SETTING_EXTERN(TYPE, NAME, DEFAULT, DESCRIPTION, FLAGS) \ + MaterializedMySQLSettings##TYPE NAME = &MaterializedMySQLSettingsImpl ::NAME; + +namespace MaterializedMySQLSetting +{ +LIST_OF_MATERIALIZE_MODE_SETTINGS(INITIALIZE_SETTING_EXTERN, SKIP_ALIAS) +} + +#undef INITIALIZE_SETTING_EXTERN + +MaterializedMySQLSettings::MaterializedMySQLSettings() : impl(std::make_unique()) +{ +} + +MaterializedMySQLSettings::MaterializedMySQLSettings(const MaterializedMySQLSettings & settings) + : impl(std::make_unique(*settings.impl)) +{ +} + +MaterializedMySQLSettings::MaterializedMySQLSettings(MaterializedMySQLSettings && settings) noexcept + : impl(std::make_unique(std::move(*settings.impl))) +{ +} + +MaterializedMySQLSettings::~MaterializedMySQLSettings() = default; + +MATERIALIZED_MYSQL_SETTINGS_SUPPORTED_TYPES(MaterializedMySQLSettings, IMPLEMENT_SETTING_SUBSCRIPT_OPERATOR) + + void MaterializedMySQLSettings::loadFromQuery(ASTStorage & storage_def) { if (storage_def.settings) { try { - applyChanges(storage_def.settings->changes); + impl->applyChanges(storage_def.settings->changes); } catch (Exception & e) { diff --git a/src/Databases/MySQL/MaterializedMySQLSettings.h b/src/Databases/MySQL/MaterializedMySQLSettings.h index b481846afc1..01cff81b972 100644 --- a/src/Databases/MySQL/MaterializedMySQLSettings.h +++ b/src/Databases/MySQL/MaterializedMySQLSettings.h @@ -1,38 +1,39 @@ #pragma once -#include -#include +#include +#include namespace DB { class ASTStorage; +struct MaterializedMySQLSettingsImpl; -#define LIST_OF_MATERIALIZE_MODE_SETTINGS(M, ALIAS) \ - M(UInt64, max_rows_in_buffer, DEFAULT_BLOCK_SIZE, "Max rows that data is allowed to cache in memory(for single table and the cache data unable to query). when rows is exceeded, the data will be materialized", 0) \ - M(UInt64, max_bytes_in_buffer, DBMS_DEFAULT_BUFFER_SIZE, "Max bytes that data is allowed to cache in memory(for single table and the cache data unable to query). when rows is exceeded, the data will be materialized", 0) \ - M(UInt64, max_rows_in_buffers, DEFAULT_BLOCK_SIZE, "Max rows that data is allowed to cache in memory(for database and the cache data unable to query). when rows is exceeded, the data will be materialized", 0) \ - M(UInt64, max_bytes_in_buffers, DBMS_DEFAULT_BUFFER_SIZE, "Max bytes that data is allowed to cache in memory(for database and the cache data unable to query). when rows is exceeded, the data will be materialized", 0) \ - M(UInt64, max_flush_data_time, 1000, "Max milliseconds that data is allowed to cache in memory(for database and the cache data unable to query). when this time is exceeded, the data will be materialized", 0) \ - M(Int64, max_wait_time_when_mysql_unavailable, 1000, "Retry interval when MySQL is not available (milliseconds). Negative value disable retry.", 0) \ - M(Bool, allows_query_when_mysql_lost, false, "Allow query materialized table when mysql is lost.", 0) \ - M(String, materialized_mysql_tables_list, "", "a comma-separated list of mysql database tables, which will be replicated by MaterializedMySQL database engine. Default value: empty list — means whole tables will be replicated.", 0) \ - M(Bool, use_binlog_client, false, "Use MySQL Binlog Client.", 0) \ - M(UInt64, max_bytes_in_binlog_queue, 64 * 1024 * 1024, "Max bytes in binlog's queue created from MySQL Binlog Client.", 0) \ - M(UInt64, max_milliseconds_to_wait_in_binlog_queue, 10000, "Max milliseconds to wait when max bytes exceeded in a binlog queue.", 0) \ - M(UInt64, max_bytes_in_binlog_dispatcher_buffer, DBMS_DEFAULT_BUFFER_SIZE, "Max bytes in the binlog dispatcher's buffer before it is flushed to attached binlogs.", 0) \ - M(UInt64, max_flush_milliseconds_in_binlog_dispatcher, 1000, "Max milliseconds in the binlog dispatcher's buffer to wait before it is flushed to attached binlogs.", 0) \ - M(Bool, allow_startup_database_without_connection_to_mysql, false, "Allow to create and attach database without available connection to MySQL.", 0) \ - - DECLARE_SETTINGS_TRAITS(MaterializedMySQLSettingsTraits, LIST_OF_MATERIALIZE_MODE_SETTINGS) +/// List of available types supported in MaterializedMySQLSettings object +#define MATERIALIZED_MYSQL_SETTINGS_SUPPORTED_TYPES(CLASS_NAME, M) \ + M(CLASS_NAME, Bool) \ + M(CLASS_NAME, Int64) \ + M(CLASS_NAME, UInt64) \ + M(CLASS_NAME, String) +MATERIALIZED_MYSQL_SETTINGS_SUPPORTED_TYPES(MaterializedMySQLSettings, DECLARE_SETTING_TRAIT) /** Settings for the MaterializedMySQL database engine. * Could be loaded from a CREATE DATABASE query (SETTINGS clause). */ -struct MaterializedMySQLSettings : public BaseSettings +struct MaterializedMySQLSettings { + MaterializedMySQLSettings(); + MaterializedMySQLSettings(const MaterializedMySQLSettings & settings); + MaterializedMySQLSettings(MaterializedMySQLSettings && settings) noexcept; + ~MaterializedMySQLSettings(); + + MATERIALIZED_MYSQL_SETTINGS_SUPPORTED_TYPES(MaterializedMySQLSettings, DECLARE_SETTING_SUBSCRIPT_OPERATOR) + void loadFromQuery(ASTStorage & storage_def); + +private: + std::unique_ptr impl; }; } diff --git a/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp b/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp index 0609f3eabbf..389d7a58b86 100644 --- a/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp +++ b/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp @@ -3,6 +3,7 @@ #if USE_MYSQL +#include #include #include #include @@ -43,6 +44,19 @@ namespace Setting extern const SettingsBool insert_allow_materialized_columns; } +namespace MaterializedMySQLSetting +{ + extern const MaterializedMySQLSettingsString materialized_mysql_tables_list; + extern const MaterializedMySQLSettingsUInt64 max_bytes_in_binlog_queue; + extern const MaterializedMySQLSettingsUInt64 max_bytes_in_buffer; + extern const MaterializedMySQLSettingsUInt64 max_bytes_in_buffers; + extern const MaterializedMySQLSettingsUInt64 max_flush_data_time; + extern const MaterializedMySQLSettingsUInt64 max_milliseconds_to_wait_in_binlog_queue; + extern const MaterializedMySQLSettingsUInt64 max_rows_in_buffer; + extern const MaterializedMySQLSettingsUInt64 max_rows_in_buffers; + extern const MaterializedMySQLSettingsInt64 max_wait_time_when_mysql_unavailable; +} + namespace ErrorCodes { extern const int SYNTAX_ERROR; @@ -270,10 +284,10 @@ MaterializedMySQLSyncThread::MaterializedMySQLSyncThread( { query_prefix = "EXTERNAL DDL FROM MySQL(" + backQuoteIfNeed(database_name) + ", " + backQuoteIfNeed(mysql_database_name) + ") "; - if (!settings->materialized_mysql_tables_list.value.empty()) + if (!(*settings)[MaterializedMySQLSetting::materialized_mysql_tables_list].value.empty()) { Names tables_list; - boost::split(tables_list, settings->materialized_mysql_tables_list.value, [](char c){ return c == ','; }); + boost::split(tables_list, (*settings)[MaterializedMySQLSetting::materialized_mysql_tables_list].value, [](char c){ return c == ','; }); for (String & table_name: tables_list) { boost::trim(table_name); @@ -305,7 +319,7 @@ void MaterializedMySQLSyncThread::synchronization() } /// TODO: add gc task for `sign = -1`(use alter table delete, execute by interval. need final state) - UInt64 max_flush_time = settings->max_flush_data_time; + UInt64 max_flush_time = (*settings)[MaterializedMySQLSetting::max_flush_data_time]; try { @@ -324,7 +338,7 @@ void MaterializedMySQLSyncThread::synchronization() } catch (const Exception & e) { - if (settings->max_wait_time_when_mysql_unavailable < 0) + if ((*settings)[MaterializedMySQLSetting::max_wait_time_when_mysql_unavailable] < 0) throw; bool binlog_was_purged = e.code() == ER_MASTER_FATAL_ERROR_READING_BINLOG || e.code() == ER_MASTER_HAS_PURGED_REQUIRED_GTIDS; @@ -335,12 +349,12 @@ void MaterializedMySQLSyncThread::synchronization() LOG_INFO(log, "Lost connection to MySQL"); need_reconnect = true; setSynchronizationThreadException(std::current_exception()); - sleepForMilliseconds(settings->max_wait_time_when_mysql_unavailable); + sleepForMilliseconds((*settings)[MaterializedMySQLSetting::max_wait_time_when_mysql_unavailable]); continue; } if (watch.elapsedMilliseconds() > max_flush_time || buffers.checkThresholds( - settings->max_rows_in_buffer, settings->max_bytes_in_buffer, - settings->max_rows_in_buffers, settings->max_bytes_in_buffers) + (*settings)[MaterializedMySQLSetting::max_rows_in_buffer], (*settings)[MaterializedMySQLSetting::max_bytes_in_buffer], + (*settings)[MaterializedMySQLSetting::max_rows_in_buffers], (*settings)[MaterializedMySQLSetting::max_bytes_in_buffers]) ) { watch.restart(); @@ -550,9 +564,9 @@ bool MaterializedMySQLSyncThread::prepareSynchronized(MaterializeMetadata & meta if (connection.isNull()) { - if (settings->max_wait_time_when_mysql_unavailable < 0) + if ((*settings)[MaterializedMySQLSetting::max_wait_time_when_mysql_unavailable] < 0) throw Exception(ErrorCodes::UNKNOWN_EXCEPTION, "Unable to connect to MySQL"); - sleepForMilliseconds(settings->max_wait_time_when_mysql_unavailable); + sleepForMilliseconds((*settings)[MaterializedMySQLSetting::max_wait_time_when_mysql_unavailable]); continue; } @@ -595,8 +609,8 @@ bool MaterializedMySQLSyncThread::prepareSynchronized(MaterializeMetadata & meta binlog = binlog_client->createBinlog(metadata.executed_gtid_set, database_name, {mysql_database_name}, - settings->max_bytes_in_binlog_queue, - settings->max_milliseconds_to_wait_in_binlog_queue); + (*settings)[MaterializedMySQLSetting::max_bytes_in_binlog_queue], + (*settings)[MaterializedMySQLSetting::max_milliseconds_to_wait_in_binlog_queue]); } else { @@ -611,7 +625,7 @@ bool MaterializedMySQLSyncThread::prepareSynchronized(MaterializeMetadata & meta { tryLogCurrentException(log); - if (settings->max_wait_time_when_mysql_unavailable < 0) + if ((*settings)[MaterializedMySQLSetting::max_wait_time_when_mysql_unavailable] < 0) throw; if (!shouldReconnectOnException(std::current_exception())) @@ -619,7 +633,7 @@ bool MaterializedMySQLSyncThread::prepareSynchronized(MaterializeMetadata & meta setSynchronizationThreadException(std::current_exception()); /// Avoid busy loop when MySQL is not available. - sleepForMilliseconds(settings->max_wait_time_when_mysql_unavailable); + sleepForMilliseconds((*settings)[MaterializedMySQLSetting::max_wait_time_when_mysql_unavailable]); } } diff --git a/src/Databases/MySQL/MaterializedMySQLSyncThread.h b/src/Databases/MySQL/MaterializedMySQLSyncThread.h index f016967fad5..53742dbef3e 100644 --- a/src/Databases/MySQL/MaterializedMySQLSyncThread.h +++ b/src/Databases/MySQL/MaterializedMySQLSyncThread.h @@ -10,7 +10,6 @@ # include # include # include -# include # include # include # include @@ -21,6 +20,7 @@ namespace DB { struct MaterializeMetadata; +struct MaterializedMySQLSettings; /** MySQL table structure and data synchronization thread * diff --git a/src/Databases/enableAllExperimentalSettings.cpp b/src/Databases/enableAllExperimentalSettings.cpp index d2a3ecfe05f..d1b3b776370 100644 --- a/src/Databases/enableAllExperimentalSettings.cpp +++ b/src/Databases/enableAllExperimentalSettings.cpp @@ -40,7 +40,6 @@ void enableAllExperimentalSettings(ContextMutablePtr context) context->setSetting("allow_suspicious_primary_key", 1); context->setSetting("allow_suspicious_ttl_expressions", 1); context->setSetting("allow_suspicious_variant_types", 1); - context->setSetting("enable_deflate_qpl_codec", 1); context->setSetting("enable_zstd_qat_codec", 1); context->setSetting("allow_create_index_without_type", 1); context->setSetting("allow_experimental_s3queue", 1); diff --git a/src/Dictionaries/MySQLDictionarySource.cpp b/src/Dictionaries/MySQLDictionarySource.cpp index ea403fc3431..e79b73e6587 100644 --- a/src/Dictionaries/MySQLDictionarySource.cpp +++ b/src/Dictionaries/MySQLDictionarySource.cpp @@ -36,6 +36,12 @@ namespace Setting extern const SettingsUInt64 glob_expansion_max_elements; } +namespace MySQLSetting +{ + extern const MySQLSettingsUInt64 connect_timeout; + extern const MySQLSettingsUInt64 read_write_timeout; +} + [[maybe_unused]] static const size_t default_num_tries_on_connection_loss = 3; @@ -82,8 +88,9 @@ void registerDictionarySourceMysql(DictionarySourceFactory & factory) if (named_collection) { auto allowed_arguments{dictionary_allowed_keys}; - for (const auto & setting : mysql_settings.all()) - allowed_arguments.insert(setting.getName()); + auto setting_names = mysql_settings.getAllRegisteredNames(); + for (const auto & name : setting_names) + allowed_arguments.insert(name); validateNamedCollection>(*named_collection, {}, allowed_arguments); StorageMySQL::Configuration::Addresses addresses; @@ -115,17 +122,12 @@ void registerDictionarySourceMysql(DictionarySourceFactory & factory) }); const auto & settings = global_context->getSettingsRef(); - if (!mysql_settings.isChanged("connect_timeout")) - mysql_settings.connect_timeout = settings[Setting::external_storage_connect_timeout_sec]; - if (!mysql_settings.isChanged("read_write_timeout")) - mysql_settings.read_write_timeout = settings[Setting::external_storage_rw_timeout_sec]; + if (!mysql_settings[MySQLSetting::connect_timeout].changed) + mysql_settings[MySQLSetting::connect_timeout] = settings[Setting::external_storage_connect_timeout_sec]; + if (!mysql_settings[MySQLSetting::read_write_timeout].changed) + mysql_settings[MySQLSetting::read_write_timeout] = settings[Setting::external_storage_rw_timeout_sec]; - for (const auto & setting : mysql_settings.all()) - { - const auto & setting_name = setting.getName(); - if (named_collection->has(setting_name)) - mysql_settings.set(setting_name, named_collection->get(setting_name)); - } + mysql_settings.loadFromNamedCollection(*named_collection); pool = std::make_shared( createMySQLPoolWithFailover( diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index ece5734a96d..7239229d417 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -18,7 +18,6 @@ #include #include #include -#include #include #include diff --git a/src/Interpreters/Cache/Metadata.cpp b/src/Interpreters/Cache/Metadata.cpp index 2ee985b1c31..99ea01aa4f1 100644 --- a/src/Interpreters/Cache/Metadata.cpp +++ b/src/Interpreters/Cache/Metadata.cpp @@ -87,8 +87,8 @@ void KeyMetadata::assertAccess(const UserID & user_id_) const if (!checkAccess(user_id_)) { throw Exception(ErrorCodes::FILECACHE_ACCESS_DENIED, - "Metadata for key {} belongs to user {}, but user {} requested it", - key.toString(), user.user_id, user_id_); + "Metadata for key {} belongs to another user", + key.toString()); } } diff --git a/src/Interpreters/ClusterProxy/executeQuery.cpp b/src/Interpreters/ClusterProxy/executeQuery.cpp index cc5d8fc255a..e88fdeb0379 100644 --- a/src/Interpreters/ClusterProxy/executeQuery.cpp +++ b/src/Interpreters/ClusterProxy/executeQuery.cpp @@ -66,6 +66,11 @@ namespace Setting extern const SettingsBool use_hedged_requests; } +namespace DistributedSetting +{ + extern const DistributedSettingsBool skip_unavailable_shards; +} + namespace ErrorCodes { extern const int TOO_LARGE_DISTRIBUTED_DEPTH; @@ -155,7 +160,7 @@ ContextMutablePtr updateSettingsAndClientInfoForCluster(const Cluster & cluster, if (!settings[Setting::skip_unavailable_shards].changed && distributed_settings) { - new_settings[Setting::skip_unavailable_shards] = distributed_settings->skip_unavailable_shards.value; + new_settings[Setting::skip_unavailable_shards] = (*distributed_settings)[DistributedSetting::skip_unavailable_shards].value; new_settings[Setting::skip_unavailable_shards].changed = true; } diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 85cde959b66..d0adf2102a1 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -1124,15 +1124,15 @@ Strings Context::getWarnings() const SharedLockGuard lock(shared->mutex); common_warnings = shared->warnings; if (CurrentMetrics::get(CurrentMetrics::AttachedTable) > static_cast(shared->max_table_num_to_warn)) - common_warnings.emplace_back(fmt::format("The number of attached tables is more than {}", shared->max_table_num_to_warn)); + common_warnings.emplace_back(fmt::format("The number of attached tables is more than {}.", shared->max_table_num_to_warn)); if (CurrentMetrics::get(CurrentMetrics::AttachedView) > static_cast(shared->max_view_num_to_warn)) - common_warnings.emplace_back(fmt::format("The number of attached views is more than {}", shared->max_view_num_to_warn)); + common_warnings.emplace_back(fmt::format("The number of attached views is more than {}.", shared->max_view_num_to_warn)); if (CurrentMetrics::get(CurrentMetrics::AttachedDictionary) > static_cast(shared->max_dictionary_num_to_warn)) - common_warnings.emplace_back(fmt::format("The number of attached dictionaries is more than {}", shared->max_dictionary_num_to_warn)); + common_warnings.emplace_back(fmt::format("The number of attached dictionaries is more than {}.", shared->max_dictionary_num_to_warn)); if (CurrentMetrics::get(CurrentMetrics::AttachedDatabase) > static_cast(shared->max_database_num_to_warn)) - common_warnings.emplace_back(fmt::format("The number of attached databases is more than {}", shared->max_database_num_to_warn)); + common_warnings.emplace_back(fmt::format("The number of attached databases is more than {}.", shared->max_database_num_to_warn)); if (CurrentMetrics::get(CurrentMetrics::PartsActive) > static_cast(shared->max_part_num_to_warn)) - common_warnings.emplace_back(fmt::format("The number of active parts is more than {}", shared->max_part_num_to_warn)); + common_warnings.emplace_back(fmt::format("The number of active parts is more than {}.", shared->max_part_num_to_warn)); } /// Make setting's name ordered auto obsolete_settings = settings->getChangedAndObsoleteNames(); diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp index 48f7b38f6c3..c92602105c5 100644 --- a/src/Interpreters/DatabaseCatalog.cpp +++ b/src/Interpreters/DatabaseCatalog.cpp @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -141,7 +142,7 @@ TemporaryTableHolder::TemporaryTableHolder( context_, [&](const StorageID & table_id) { - auto storage = std::make_shared(table_id, ColumnsDescription{columns}, ConstraintsDescription{constraints}, String{}); + auto storage = std::make_shared(table_id, ColumnsDescription{columns}, ConstraintsDescription{constraints}, String{}, MemorySettings{}); if (create_for_global_subquery) storage->delayReadForGlobalSubqueries(); diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 6057afefd02..22bba01a60f 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -129,7 +129,6 @@ namespace Setting extern const SettingsDefaultTableEngine default_temporary_table_engine; extern const SettingsString default_view_definer; extern const SettingsUInt64 distributed_ddl_entry_format_version; - extern const SettingsBool enable_deflate_qpl_codec; extern const SettingsBool enable_zstd_qat_codec; extern const SettingsBool flatten_nested; extern const SettingsBool fsync_metadata; @@ -667,7 +666,6 @@ ColumnsDescription InterpreterCreateQuery::getColumnsDescription( bool skip_checks = LoadingStrictnessLevel::SECONDARY_CREATE <= mode; bool sanity_check_compression_codecs = !skip_checks && !context_->getSettingsRef()[Setting::allow_suspicious_codecs]; bool allow_experimental_codecs = skip_checks || context_->getSettingsRef()[Setting::allow_experimental_codecs]; - bool enable_deflate_qpl_codec = skip_checks || context_->getSettingsRef()[Setting::enable_deflate_qpl_codec]; bool enable_zstd_qat_codec = skip_checks || context_->getSettingsRef()[Setting::enable_zstd_qat_codec]; ColumnsDescription res; @@ -729,7 +727,7 @@ ColumnsDescription InterpreterCreateQuery::getColumnsDescription( if (col_decl.default_specifier == "ALIAS") throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot specify codec for column type ALIAS"); column.codec = CompressionCodecFactory::instance().validateCodecAndGetPreprocessedAST( - col_decl.codec, column.type, sanity_check_compression_codecs, allow_experimental_codecs, enable_deflate_qpl_codec, enable_zstd_qat_codec); + col_decl.codec, column.type, sanity_check_compression_codecs, allow_experimental_codecs, enable_zstd_qat_codec); } if (col_decl.statistics_desc) diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index d8c35285210..3918c1c37ea 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -1708,7 +1708,8 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional

#include + namespace DB { @@ -45,9 +46,7 @@ static void applySettingsFromSelectWithUnion(const ASTSelectWithUnionQuery & sel // It is flattened later, when we process UNION ALL/DISTINCT. const auto * last_select = children.back()->as(); if (last_select && last_select->settings()) - { - InterpreterSetQuery(last_select->settings(), context).executeForCurrentContext(); - } + InterpreterSetQuery(last_select->settings(), context).executeForCurrentContext(/* ignore_setting_constraints= */ false); } void InterpreterSetQuery::applySettingsFromQuery(const ASTPtr & ast, ContextMutablePtr context_) @@ -55,10 +54,20 @@ void InterpreterSetQuery::applySettingsFromQuery(const ASTPtr & ast, ContextMuta if (!ast) return; + /// First apply the outermost settings. Then they could be overridden by deeper settings. + if (const auto * query_with_output = dynamic_cast(ast.get())) + { + if (query_with_output->settings_ast) + InterpreterSetQuery(query_with_output->settings_ast, context_).executeForCurrentContext(/* ignore_setting_constraints= */ false); + + if (const auto * create_query = ast->as(); create_query && create_query->select) + applySettingsFromSelectWithUnion(create_query->select->as(), context_); + } + if (const auto * select_query = ast->as()) { if (auto new_settings = select_query->settings()) - InterpreterSetQuery(new_settings, context_).executeForCurrentContext(); + InterpreterSetQuery(new_settings, context_).executeForCurrentContext(/* ignore_setting_constraints= */ false); } else if (const auto * select_with_union_query = ast->as()) { @@ -67,28 +76,15 @@ void InterpreterSetQuery::applySettingsFromQuery(const ASTPtr & ast, ContextMuta else if (const auto * explain_query = ast->as()) { if (explain_query->settings_ast) - InterpreterSetQuery(explain_query->settings_ast, context_).executeForCurrentContext(); + InterpreterSetQuery(explain_query->settings_ast, context_).executeForCurrentContext(/* ignore_setting_constraints= */ false); applySettingsFromQuery(explain_query->getExplainedQuery(), context_); } - else if (const auto * query_with_output = dynamic_cast(ast.get())) - { - if (query_with_output->settings_ast) - InterpreterSetQuery(query_with_output->settings_ast, context_).executeForCurrentContext(); - - if (const auto * create_query = ast->as()) - { - if (create_query->select) - { - applySettingsFromSelectWithUnion(create_query->select->as(), context_); - } - } - } else if (auto * insert_query = ast->as()) { context_->setInsertFormat(insert_query->format); if (insert_query->settings_ast) - InterpreterSetQuery(insert_query->settings_ast, context_).executeForCurrentContext(); + InterpreterSetQuery(insert_query->settings_ast, context_).executeForCurrentContext(/* ignore_setting_constraints= */ false); } } diff --git a/src/Interpreters/InterpreterSetQuery.h b/src/Interpreters/InterpreterSetQuery.h index 2438762f347..f50105c39f4 100644 --- a/src/Interpreters/InterpreterSetQuery.h +++ b/src/Interpreters/InterpreterSetQuery.h @@ -23,7 +23,7 @@ public: /** Set setting for current context (query context). * It is used for interpretation of SETTINGS clause in SELECT query. */ - void executeForCurrentContext(bool ignore_setting_constraints = false); + void executeForCurrentContext(bool ignore_setting_constraints); bool supportsTransactions() const override { return true; } diff --git a/src/Interpreters/PartLog.cpp b/src/Interpreters/PartLog.cpp index 49c817586fa..ce108ea8622 100644 --- a/src/Interpreters/PartLog.cpp +++ b/src/Interpreters/PartLog.cpp @@ -1,6 +1,5 @@ #include #include -#include #include #include #include @@ -12,9 +11,7 @@ #include #include #include -#include #include -#include #include #include @@ -68,6 +65,8 @@ ColumnsDescription PartLogElement::getColumnsDescription() {"RemovePart", static_cast(REMOVE_PART)}, {"MutatePart", static_cast(MUTATE_PART)}, {"MovePart", static_cast(MOVE_PART)}, + {"MergePartsStart", static_cast(MERGE_PARTS_START)}, + {"MutatePartStart", static_cast(MUTATE_PART_START)}, } ); @@ -102,10 +101,12 @@ ColumnsDescription PartLogElement::getColumnsDescription() "Type of the event that occurred with the data part. " "Can have one of the following values: " "NewPart — Inserting of a new data part, " - "MergeParts — Merging of data parts, " + "MergePartsStart — Merging of data parts has started, " + "MergeParts — Merging of data parts has finished, " "DownloadPart — Downloading a data part, " - "RemovePart — Removing or detaching a data part using DETACH PARTITION, " - "MutatePart — Mutating of a data part, " + "RemovePart — Removing or detaching a data part using [DETACH PARTITION](../../sql-reference/statements/alter/partition.md#alter_detach-partition)." + "MutatePartStart — Mutating of a data part has started, " + "MutatePart — Mutating of a data part has finished, " "MovePart — Moving the data part from the one disk to another one."}, {"merge_reason", std::move(merge_reason_datatype), "The reason for the event with type MERGE_PARTS. Can have one of the following values: " diff --git a/src/Interpreters/PartLog.h b/src/Interpreters/PartLog.h index 6dc3116ad48..92ad50f139d 100644 --- a/src/Interpreters/PartLog.h +++ b/src/Interpreters/PartLog.h @@ -26,6 +26,8 @@ struct PartLogElement REMOVE_PART = 4, MUTATE_PART = 5, MOVE_PART = 6, + MERGE_PARTS_START = 7, + MUTATE_PART_START = 8, }; /// Copy of MergeAlgorithm since values are written to disk. @@ -135,7 +137,7 @@ public: static PartLogEntries createPartLogEntries(const MutableDataPartsVector & parts, UInt64 elapsed_ns, ProfileCountersSnapshotPtr profile_counters = {}); - /// Add a record about creation of new part. + /// Add a record about creation of a new part. static bool addNewPart(ContextPtr context, const PartLogEntry & part, const ExecutionStatus & execution_status = {}); diff --git a/src/Loggers/Loggers.cpp b/src/Loggers/Loggers.cpp index 35b96bce42a..b6510b7573f 100644 --- a/src/Loggers/Loggers.cpp +++ b/src/Loggers/Loggers.cpp @@ -1,12 +1,14 @@ #include "Loggers.h" -#include "OwnFormattingChannel.h" -#include "OwnPatternFormatter.h" -#include "OwnSplitChannel.h" +#include +#include +#include +#include #include #include +#include #include #include #include @@ -222,6 +224,18 @@ void Loggers::buildLoggers(Poco::Util::AbstractConfiguration & config, Poco::Log logger.close(); logger.setChannel(split); + + const std::string global_pos_pattern = config.getRawString("logger.message_regexp", ""); + const std::string global_neg_pattern = config.getRawString("logger.message_regexp_negative", ""); + + Poco::AutoPtr pf; + if (config.getString("logger.formatting.type", "") == "json") + pf = new OwnJSONPatternFormatter(config); + else + pf = new OwnPatternFormatter; + + DB::createOrUpdateFilterChannel(logger, global_pos_pattern, global_neg_pattern, pf, Poco::Logger::ROOT); + logger.setLevel(max_log_level); // Global logging level and channel (it can be overridden for specific loggers). @@ -236,6 +250,8 @@ void Loggers::buildLoggers(Poco::Util::AbstractConfiguration & config, Poco::Log { logger.get(name).setLevel(max_log_level); logger.get(name).setChannel(split); + + DB::createOrUpdateFilterChannel(logger.get(name), global_pos_pattern, global_neg_pattern, pf, name); } // Explicitly specified log levels for specific loggers. @@ -262,6 +278,26 @@ void Loggers::buildLoggers(Poco::Util::AbstractConfiguration & config, Poco::Log } } } + // Explicitly specified regexp patterns for filtering specific loggers + { + Poco::Util::AbstractConfiguration::Keys loggers_regexp; + config.keys("logger.message_regexps", loggers_regexp); + + if (!loggers_regexp.empty()) + { + for (const auto & key : loggers_regexp) + { + if (key == "logger" || key.starts_with("logger[")) + { + const std::string name = config.getString("logger.message_regexps." + key + ".name"); + const std::string pos_pattern = config.getRawString("logger.message_regexps." + key + ".message_regexp", global_pos_pattern); + const std::string neg_pattern = config.getRawString("logger.message_regexps." + key + ".message_regexp_negative", global_neg_pattern); + + DB::createOrUpdateFilterChannel(logger.root().get(name), pos_pattern, neg_pattern, pf, name); + } + } + } + } #ifndef WITHOUT_TEXT_LOG if (allowTextLog() && config.has("text_log")) { @@ -347,16 +383,32 @@ void Loggers::updateLevels(Poco::Util::AbstractConfiguration & config, Poco::Log } split->setLevel("syslog", syslog_level); + const std::string global_pos_pattern = config.getRawString("logger.message_regexp", ""); + const std::string global_neg_pattern = config.getRawString("logger.message_regexp_negative", ""); + + Poco::AutoPtr pf; + if (config.getString("logger.formatting.type", "") == "json") + pf = new OwnJSONPatternFormatter(config); + else + pf = new OwnPatternFormatter; + + DB::createOrUpdateFilterChannel(logger, global_pos_pattern, global_neg_pattern, pf, Poco::Logger::ROOT); + // Global logging level (it can be overridden for specific loggers). logger.setLevel(max_log_level); // Set level to all already created loggers std::vector names; - logger.root().names(names); + + // Set all to global in case logger.levels are not specified for (const auto & name : names) + { logger.root().get(name).setLevel(max_log_level); + DB::createOrUpdateFilterChannel(logger.root().get(name), global_pos_pattern, global_neg_pattern, pf, name); + } + logger.root().setLevel(max_log_level); // Explicitly specified log levels for specific loggers. @@ -383,6 +435,27 @@ void Loggers::updateLevels(Poco::Util::AbstractConfiguration & config, Poco::Log } } } + + // Explicitly specified regexp patterns for filtering specific loggers + { + Poco::Util::AbstractConfiguration::Keys loggers_regexp; + config.keys("logger.message_regexps", loggers_regexp); + + if (!loggers_regexp.empty()) + { + for (const auto & key : loggers_regexp) + { + if (key == "logger" || key.starts_with("logger[")) + { + const std::string name(config.getString("logger.message_regexps." + key + ".name")); + const std::string pos_pattern(config.getRawString("logger.message_regexps." + key + ".message_regexp", global_pos_pattern)); + const std::string neg_pattern(config.getRawString("logger.message_regexps." + key + ".message_regexp_negative", global_neg_pattern)); + + DB::createOrUpdateFilterChannel(logger.root().get(name), pos_pattern, neg_pattern, pf, name); + } + } + } + } } /// NOLINTEND(readability-static-accessed-through-instance) diff --git a/src/Loggers/OwnFilteringChannel.cpp b/src/Loggers/OwnFilteringChannel.cpp new file mode 100644 index 00000000000..36193c46314 --- /dev/null +++ b/src/Loggers/OwnFilteringChannel.cpp @@ -0,0 +1,96 @@ +#include +#include +#include + + +namespace DB +{ + +void OwnFilteringChannel::log(const Poco::Message & msg) +{ + if (regexpFilteredOut(msg)) + return; + + pChannel->log(msg); +} + +bool OwnFilteringChannel::regexpFilteredOut(const Poco::Message & msg) +{ + std::string formatted_text; + auto [pos_pattern, neg_pattern] = safeGetPatterns(); + + // Skip checks if both patterns are empty + if (!pos_pattern.empty() || !neg_pattern.empty()) + { + // Apply formatting to the text + if (pFormatter) + { + pFormatter->formatExtended(ExtendedLogMessage::getFrom(msg), formatted_text); + } + else + { + formatted_text = msg.getText(); + } + + // Check for patterns in formatted text + Poco::RegularExpression positive_regexp(pos_pattern); + if (!pos_pattern.empty() && !positive_regexp.match(formatted_text)) + { + return true; + } + + Poco::RegularExpression negative_regexp(neg_pattern); + if (!neg_pattern.empty() && negative_regexp.match(formatted_text)) + { + return true; + } + } + + return false; +} + +void OwnFilteringChannel::setRegexpPatterns(const std::string & new_pos_pattern, const std::string & new_neg_pattern) +{ + auto [old_pos_pattern, old_neg_pattern] = safeGetPatterns(); + if (old_pos_pattern != new_pos_pattern || old_neg_pattern != new_neg_pattern) + { + std::unique_lock write_lock(pattern_mutex); + positive_pattern = new_pos_pattern; + negative_pattern = new_neg_pattern; + } +} + +std::pair OwnFilteringChannel::safeGetPatterns() +{ + std::shared_lock read_lock(pattern_mutex); + return std::make_pair(positive_pattern, negative_pattern); +} + +void createOrUpdateFilterChannel(Poco::Logger & logger, const std::string & pos_pattern, const std::string & neg_pattern, Poco::AutoPtr pf, const std::string & name) +{ + Poco::AutoPtr src_channel(logger.getChannel(), true /*shared*/); + Poco::AutoPtr filter_channel(dynamic_cast(src_channel.get()), true); + + // If this logger doesn't have it's own unique filter channel + if (!filter_channel) + { + // Skip if regexp feature has never been used yet + if (pos_pattern.empty() && neg_pattern.empty()) + return; + + Poco::AutoPtr new_filter_channel = new DB::OwnFilteringChannel(src_channel, pf, pos_pattern, neg_pattern, name); + logger.setChannel(new_filter_channel); + } + // If logger has filter channel, but not it's own unique one (e.g copied from another by default), create copy + else if (filter_channel->getAssignedLoggerName() != name) + { + Poco::AutoPtr new_filter_channel = new DB::OwnFilteringChannel(filter_channel, pos_pattern, neg_pattern, name); + logger.setChannel(new_filter_channel); + } + else + { + filter_channel->setRegexpPatterns(pos_pattern, neg_pattern); + } +} + +} diff --git a/src/Loggers/OwnFilteringChannel.h b/src/Loggers/OwnFilteringChannel.h new file mode 100644 index 00000000000..5dce6007baf --- /dev/null +++ b/src/Loggers/OwnFilteringChannel.h @@ -0,0 +1,84 @@ +#pragma once +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +// Filters the logs based on regular expressions. Should be processed after formatting channel to read entire formatted text +class OwnFilteringChannel : public Poco::Channel +{ +public: + explicit OwnFilteringChannel(Poco::AutoPtr pChannel_, Poco::AutoPtr pf, + const std::string & positive_pattern_, const std::string & negative_pattern_, const std::string & name_) + : logger_name(name_), positive_pattern(positive_pattern_), negative_pattern(negative_pattern_), pChannel(pChannel_), pFormatter(pf) + { + } + + explicit OwnFilteringChannel(Poco::AutoPtr other, const std::string & positive_pattern_, const std::string & negative_pattern_, const std::string & name_) + : logger_name(name_), positive_pattern(positive_pattern_), negative_pattern(negative_pattern_), pChannel(other->pChannel), pFormatter(other->pFormatter) + { + } + + // Only log if pass both positive and negative regexp checks. + // Checks the regexps on the formatted text (without color), but then passes the raw text + // to the split channel to handle formatting for individual channels (e.g apply color) + void log(const Poco::Message & msg) override; + + // Sets the regex patterns to use for filtering. Specifying an empty string pattern "" indicates no filtering + void setRegexpPatterns(const std::string & new_pos_pattern, const std::string & new_neg_pattern); + + std::string getAssignedLoggerName() const + { + return logger_name; + } + + void open() override + { + if (pChannel) + pChannel->open(); + } + + void close() override + { + if (pChannel) + pChannel->close(); + } + + void setProperty(const std::string & name, const std::string & value) override + { + if (pChannel) + pChannel->setProperty(name, value); + } + + std::string getProperty(const std::string & name) const override + { + if (pChannel) + return pChannel->getProperty(name); + return ""; + } + +private: + bool regexpFilteredOut(const Poco::Message & msg); + + // Create copy safely, so we don't have to worry about race conditions from reading and writing at the same time + std::pair safeGetPatterns(); + + const std::string logger_name; + std::string positive_pattern; + std::string negative_pattern; + Poco::AutoPtr pChannel; + Poco::AutoPtr pFormatter; + std::shared_mutex pattern_mutex; +}; + +// Creates filter channel only if needed or updates if it already exists +void createOrUpdateFilterChannel(Poco::Logger & logger, const std::string & pos_pattern, const std::string & neg_pattern, Poco::AutoPtr pf, const std::string & name = ""); + +} diff --git a/src/Parsers/IAST.cpp b/src/Parsers/IAST.cpp index 2058c7c60cf..2b581f20e3b 100644 --- a/src/Parsers/IAST.cpp +++ b/src/Parsers/IAST.cpp @@ -177,7 +177,6 @@ String IAST::formatWithPossiblyHidingSensitiveData( IdentifierQuotingRule identifier_quoting_rule, IdentifierQuotingStyle identifier_quoting_style) const { - WriteBufferFromOwnString buf; FormatSettings settings(buf, one_line); settings.show_secrets = show_secrets; @@ -287,7 +286,8 @@ void IAST::dumpTree(WriteBuffer & ostr, size_t indent) const writeChar('\n', ostr); for (const auto & child : children) { - if (!child) throw Exception(ErrorCodes::UNKNOWN_ELEMENT_IN_AST, "Can't dump nullptr child"); + if (!child) + throw Exception(ErrorCodes::UNKNOWN_ELEMENT_IN_AST, "Can't dump a nullptr child"); child->dumpTree(ostr, indent + 1); } } diff --git a/src/Parsers/ParserQueryWithOutput.cpp b/src/Parsers/ParserQueryWithOutput.cpp index cb0c10cd1c9..ac8f7d560e0 100644 --- a/src/Parsers/ParserQueryWithOutput.cpp +++ b/src/Parsers/ParserQueryWithOutput.cpp @@ -25,7 +25,6 @@ #include #include #include -#include #include #include #include @@ -152,37 +151,55 @@ bool ParserQueryWithOutput::parseImpl(Pos & pos, ASTPtr & node, Expected & expec } + /// These two sections are allowed in an arbitrary order. ParserKeyword s_format(Keyword::FORMAT); - - if (s_format.ignore(pos, expected)) - { - ParserIdentifier format_p; - - if (!format_p.parse(pos, query_with_output.format, expected)) - return false; - setIdentifierSpecial(query_with_output.format); - - query_with_output.children.push_back(query_with_output.format); - } - - // SETTINGS key1 = value1, key2 = value2, ... ParserKeyword s_settings(Keyword::SETTINGS); - if (!query_with_output.settings_ast && s_settings.ignore(pos, expected)) - { - ParserSetQuery parser_settings(true); - if (!parser_settings.parse(pos, query_with_output.settings_ast, expected)) - return false; - query_with_output.children.push_back(query_with_output.settings_ast); - // SETTINGS after FORMAT is not parsed by the SELECT parser (ParserSelectQuery) - // Pass them manually, to apply in InterpreterSelectQuery::initSettings() - if (query->as()) + /** Why: let's take the following example: + * SELECT 1 UNION ALL SELECT 2 FORMAT TSV + * Each subquery can be put in parentheses and have its own settings: + * (SELECT 1 SETTINGS a=b) UNION ALL (SELECT 2 SETTINGS c=d) FORMAT TSV + * And the whole query can have settings: + * (SELECT 1 SETTINGS a=b) UNION ALL (SELECT 2 SETTINGS c=d) FORMAT TSV SETTINGS e=f + * A single query with output is parsed in the same way as the UNION ALL chain: + * SELECT 1 SETTINGS a=b FORMAT TSV SETTINGS e=f + * So while these forms have a slightly different meaning, they both exist: + * SELECT 1 SETTINGS a=b FORMAT TSV + * SELECT 1 FORMAT TSV SETTINGS e=f + * And due to this effect, the users expect that the FORMAT and SETTINGS may go in an arbitrary order. + * But while this work: + * (SELECT 1) UNION ALL (SELECT 2) FORMAT TSV SETTINGS d=f + * This does not work automatically, unless we explicitly allow different orders: + * (SELECT 1) UNION ALL (SELECT 2) SETTINGS d=f FORMAT TSV + * Inevitably, we also allow this: + * SELECT 1 SETTINGS a=b SETTINGS d=f FORMAT TSV + * ^^^^^^^^^^^^^^^^^^^^^ + * Because this part is consumed into ASTSelectWithUnionQuery + * and the rest into ASTQueryWithOutput. + */ + + for (size_t i = 0; i < 2; ++i) + { + if (!query_with_output.format && s_format.ignore(pos, expected)) { - auto settings = query_with_output.settings_ast->clone(); - assert_cast(settings.get())->print_in_format = false; - QueryWithOutputSettingsPushDownVisitor::Data data{settings}; - QueryWithOutputSettingsPushDownVisitor(data).visit(query); + ParserIdentifier format_p; + + if (!format_p.parse(pos, query_with_output.format, expected)) + return false; + setIdentifierSpecial(query_with_output.format); + + query_with_output.children.push_back(query_with_output.format); } + else if (!query_with_output.settings_ast && s_settings.ignore(pos, expected)) + { + // SETTINGS key1 = value1, key2 = value2, ... + ParserSetQuery parser_settings(true); + if (!parser_settings.parse(pos, query_with_output.settings_ast, expected)) + return false; + query_with_output.children.push_back(query_with_output.settings_ast); + } + else + break; } node = std::move(query); diff --git a/src/Parsers/QueryWithOutputSettingsPushDownVisitor.cpp b/src/Parsers/QueryWithOutputSettingsPushDownVisitor.cpp deleted file mode 100644 index 8cf0d0063ae..00000000000 --- a/src/Parsers/QueryWithOutputSettingsPushDownVisitor.cpp +++ /dev/null @@ -1,56 +0,0 @@ -#include -#include -#include -#include -#include -#include - -#include -#include - -namespace DB -{ - -bool QueryWithOutputSettingsPushDownMatcher::needChildVisit(ASTPtr & node, const ASTPtr & child) -{ - if (node->as()) - return true; - if (node->as()) - return true; - if (child->as()) - return true; - return false; -} - -void QueryWithOutputSettingsPushDownMatcher::visit(ASTPtr & ast, Data & data) -{ - if (auto * select_query = ast->as()) - visit(*select_query, ast, data); -} - -void QueryWithOutputSettingsPushDownMatcher::visit(ASTSelectQuery & select_query, ASTPtr &, Data & data) -{ - ASTPtr select_settings_ast = select_query.settings(); - if (!select_settings_ast) - { - select_query.setExpression(ASTSelectQuery::Expression::SETTINGS, data.settings_ast->clone()); - return; - } - - SettingsChanges & select_settings = select_settings_ast->as().changes; - SettingsChanges & settings = data.settings_ast->as().changes; - - for (auto & setting : settings) - { - auto it = std::find_if(select_settings.begin(), select_settings.end(), [&](auto & select_setting) - { - return select_setting.name == setting.name; - }); - if (it == select_settings.end()) - select_settings.push_back(setting); - else - it->value = setting.value; - } -} - -} diff --git a/src/Parsers/QueryWithOutputSettingsPushDownVisitor.h b/src/Parsers/QueryWithOutputSettingsPushDownVisitor.h deleted file mode 100644 index fde8a07b555..00000000000 --- a/src/Parsers/QueryWithOutputSettingsPushDownVisitor.h +++ /dev/null @@ -1,39 +0,0 @@ -#pragma once - -#include -#include - -namespace DB -{ - -class ASTSelectQuery; -struct SettingChange; -class SettingsChanges; - -/// Pushdown SETTINGS clause that goes after FORMAT to the SELECT query: -/// (since settings after FORMAT parsed separately not in the ParserSelectQuery but in ParserQueryWithOutput) -/// -/// SELECT 1 FORMAT Null SETTINGS max_block_size = 1 -> -/// SELECT 1 SETTINGS max_block_size = 1 FORMAT Null SETTINGS max_block_size = 1 -/// -/// Otherwise settings after FORMAT will not be applied. -class QueryWithOutputSettingsPushDownMatcher -{ -public: - using Visitor = InDepthNodeVisitor; - - struct Data - { - const ASTPtr & settings_ast; - }; - - static bool needChildVisit(ASTPtr & node, const ASTPtr & child); - static void visit(ASTPtr & ast, Data & data); - -private: - static void visit(ASTSelectQuery &, ASTPtr &, Data &); -}; - -using QueryWithOutputSettingsPushDownVisitor = QueryWithOutputSettingsPushDownMatcher::Visitor; - -} diff --git a/src/Planner/Planner.cpp b/src/Planner/Planner.cpp index 2947af6cdb3..8d3c75fdabb 100644 --- a/src/Planner/Planner.cpp +++ b/src/Planner/Planner.cpp @@ -858,9 +858,8 @@ void addWithFillStepIfNeeded(QueryPlan & query_plan, query_plan.addStep(std::move(filling_step)); } -void addLimitByStep(QueryPlan & query_plan, - const LimitByAnalysisResult & limit_by_analysis_result, - const QueryNode & query_node) +void addLimitByStep( + QueryPlan & query_plan, const LimitByAnalysisResult & limit_by_analysis_result, const QueryNode & query_node, bool do_not_skip_offset) { /// Constness of LIMIT BY limit is validated during query analysis stage UInt64 limit_by_limit = query_node.getLimitByLimit()->as().getValue().safeGet(); @@ -872,6 +871,15 @@ void addLimitByStep(QueryPlan & query_plan, limit_by_offset = query_node.getLimitByOffset()->as().getValue().safeGet(); } + if (do_not_skip_offset) + { + if (limit_by_limit > std::numeric_limits::max() - limit_by_offset) + return; + + limit_by_limit += limit_by_offset; + limit_by_offset = 0; + } + auto limit_by_step = std::make_unique(query_plan.getCurrentHeader(), limit_by_limit, limit_by_offset, @@ -985,10 +993,14 @@ void addPreliminarySortOrDistinctOrLimitStepsIfNeeded(QueryPlan & query_plan, { auto & limit_by_analysis_result = expressions_analysis_result.getLimitBy(); addExpressionStep(query_plan, limit_by_analysis_result.before_limit_by_actions, "Before LIMIT BY", useful_sets); - addLimitByStep(query_plan, limit_by_analysis_result, query_node); + /// We don't apply LIMIT BY on remote nodes at all in the old infrastructure. + /// https://github.com/ClickHouse/ClickHouse/blob/67c1e89d90ef576e62f8b1c68269742a3c6f9b1e/src/Interpreters/InterpreterSelectQuery.cpp#L1697-L1705 + /// Let's be optimistic and only don't skip offset (it will be skipped on the initiator). + addLimitByStep(query_plan, limit_by_analysis_result, query_node, true /*do_not_skip_offset*/); } - if (query_node.hasLimit()) + /// WITH TIES simply not supported properly for preliminary steps, so let's disable it. + if (query_node.hasLimit() && !query_node.hasLimitByOffset() && !query_node.isLimitWithTies()) addPreliminaryLimitStep(query_plan, query_analysis_result, planner_context, true /*do_not_skip_offset*/); } @@ -1777,21 +1789,20 @@ void Planner::buildPlanForQueryNode() { auto & limit_by_analysis_result = expression_analysis_result.getLimitBy(); addExpressionStep(query_plan, limit_by_analysis_result.before_limit_by_actions, "Before LIMIT BY", useful_sets); - addLimitByStep(query_plan, limit_by_analysis_result, query_node); + addLimitByStep(query_plan, limit_by_analysis_result, query_node, false /*do_not_skip_offset*/); } if (query_node.hasOrderBy()) addWithFillStepIfNeeded(query_plan, query_analysis_result, planner_context, query_node); - bool apply_offset = query_processing_info.getToStage() != QueryProcessingStage::WithMergeableStateAfterAggregationAndLimit; - - if (query_node.hasLimit() && query_node.isLimitWithTies() && apply_offset) + const bool apply_limit = query_processing_info.getToStage() != QueryProcessingStage::WithMergeableStateAfterAggregation; + const bool apply_offset = query_processing_info.getToStage() != QueryProcessingStage::WithMergeableStateAfterAggregationAndLimit; + if (query_node.hasLimit() && query_node.isLimitWithTies() && apply_limit && apply_offset) addLimitStep(query_plan, query_analysis_result, planner_context, query_node); addExtremesStepIfNeeded(query_plan, planner_context); bool limit_applied = applied_prelimit || (query_node.isLimitWithTies() && apply_offset); - bool apply_limit = query_processing_info.getToStage() != QueryProcessingStage::WithMergeableStateAfterAggregation; /** Limit is no longer needed if there is prelimit. * diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index 8fe2d874ca5..3186df6a6b3 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -175,6 +175,7 @@ namespace Setting extern const SettingsBool use_skip_indexes; extern const SettingsBool use_skip_indexes_if_final; extern const SettingsBool use_uncompressed_cache; + extern const SettingsUInt64 merge_tree_min_read_task_size; } namespace MergeTreeSetting @@ -446,20 +447,17 @@ Pipe ReadFromMergeTree::readFromPoolParallelReplicas(RangesInDataParts parts_wit reader_settings, required_columns, pool_settings, + block_size, context); - auto block_size_copy = block_size; - block_size_copy.min_marks_to_read = pool_settings.min_marks_for_concurrent_read; - Pipes pipes; for (size_t i = 0; i < pool_settings.threads; ++i) { auto algorithm = std::make_unique(i); - auto processor = std::make_unique( - pool, std::move(algorithm), prewhere_info, - actions_settings, block_size_copy, reader_settings); + auto processor + = std::make_unique(pool, std::move(algorithm), prewhere_info, actions_settings, reader_settings); auto source = std::make_shared(std::move(processor), data.getLogName()); pipes.emplace_back(std::move(source)); @@ -526,6 +524,7 @@ Pipe ReadFromMergeTree::readFromPool( reader_settings, required_columns, pool_settings, + block_size, context); } else @@ -540,25 +539,19 @@ Pipe ReadFromMergeTree::readFromPool( reader_settings, required_columns, pool_settings, + block_size, context); } LOG_DEBUG(log, "Reading approx. {} rows with {} streams", total_rows, pool_settings.threads); - /// The reason why we change this setting is because MergeTreeReadPool takes the full task - /// ignoring min_marks_to_read setting in case of remote disk (see MergeTreeReadPool::getTask). - /// In this case, we won't limit the number of rows to read based on adaptive granularity settings. - auto block_size_copy = block_size; - block_size_copy.min_marks_to_read = pool_settings.min_marks_for_concurrent_read; - Pipes pipes; for (size_t i = 0; i < pool_settings.threads; ++i) { auto algorithm = std::make_unique(i); - auto processor = std::make_unique( - pool, std::move(algorithm), prewhere_info, - actions_settings, block_size_copy, reader_settings); + auto processor + = std::make_unique(pool, std::move(algorithm), prewhere_info, actions_settings, reader_settings); auto source = std::make_shared(std::move(processor), data.getLogName()); @@ -627,6 +620,7 @@ Pipe ReadFromMergeTree::readInOrder( reader_settings, required_columns, pool_settings, + block_size, context); } else @@ -643,6 +637,7 @@ Pipe ReadFromMergeTree::readInOrder( reader_settings, required_columns, pool_settings, + block_size, context); } @@ -676,9 +671,8 @@ Pipe ReadFromMergeTree::readInOrder( else algorithm = std::make_unique(i); - auto processor = std::make_unique( - pool, std::move(algorithm), prewhere_info, - actions_settings, block_size, reader_settings); + auto processor + = std::make_unique(pool, std::move(algorithm), prewhere_info, actions_settings, reader_settings); processor->addPartLevelToChunk(isQueryWithFinal()); @@ -798,7 +792,7 @@ struct PartRangesReadInfo min_marks_for_concurrent_read = MergeTreeDataSelectExecutor::minMarksForConcurrentRead( min_rows_for_concurrent_read, min_bytes_for_concurrent_read, - data_settings[MergeTreeSetting::index_granularity], index_granularity_bytes, sum_marks); + data_settings[MergeTreeSetting::index_granularity], index_granularity_bytes, settings[Setting::merge_tree_min_read_task_size], sum_marks); use_uncompressed_cache = settings[Setting::use_uncompressed_cache]; if (sum_marks > max_marks_to_use_cache) diff --git a/src/Server/HTTPHandlerFactory.cpp b/src/Server/HTTPHandlerFactory.cpp index a99f0a50a4b..2d5ddd859fe 100644 --- a/src/Server/HTTPHandlerFactory.cpp +++ b/src/Server/HTTPHandlerFactory.cpp @@ -153,6 +153,12 @@ static inline auto createHandlersFactoryFromConfig( handler->addFiltersFromConfig(config, prefix + "." + key); main_handler_factory->addHandler(std::move(handler)); } + else if (handler_type == "merges") + { + auto handler = std::make_shared>(server); + handler->addFiltersFromConfig(config, prefix + "." + key); + main_handler_factory->addHandler(std::move(handler)); + } else throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Unknown handler type '{}' in config here: {}.{}.handler.type", handler_type, prefix, key); @@ -247,6 +253,12 @@ void addCommonDefaultHandlersFactory(HTTPRequestHandlerFactoryMain & factory, IS factory.addPathToHints("/binary"); factory.addHandler(binary_handler); + auto merges_handler = std::make_shared>(server); + merges_handler->attachNonStrictPath("/merges"); + merges_handler->allowGetAndHeadRequest(); + factory.addPathToHints("/merges"); + factory.addHandler(merges_handler); + auto js_handler = std::make_shared>(server); js_handler->attachNonStrictPath("/js/"); js_handler->allowGetAndHeadRequest(); diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index f18c9f1cb95..921c53b6bcb 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -78,7 +78,6 @@ namespace Setting extern const SettingsUInt64 async_insert_max_data_size; extern const SettingsBool calculate_text_stack_trace; extern const SettingsBool deduplicate_blocks_in_dependent_materialized_views; - extern const SettingsBool enable_deflate_qpl_codec; extern const SettingsBool enable_zstd_qat_codec; extern const SettingsUInt64 idle_connection_timeout; extern const SettingsBool input_format_defaults_for_omitted_fields; @@ -2238,7 +2237,6 @@ void TCPHandler::initBlockOutput(const Block & block) level, !query_settings[Setting::allow_suspicious_codecs], query_settings[Setting::allow_experimental_codecs], - query_settings[Setting::enable_deflate_qpl_codec], query_settings[Setting::enable_zstd_qat_codec]); state.maybe_compressed_out = std::make_shared( diff --git a/src/Server/WebUIRequestHandler.cpp b/src/Server/WebUIRequestHandler.cpp index 0f5a2775e5b..a217ec0ec35 100644 --- a/src/Server/WebUIRequestHandler.cpp +++ b/src/Server/WebUIRequestHandler.cpp @@ -21,6 +21,7 @@ INCBIN(resource_dashboard_html, SOURCE_DIR "/programs/server/dashboard.html"); INCBIN(resource_uplot_js, SOURCE_DIR "/programs/server/js/uplot.js"); INCBIN(resource_lz_string_js, SOURCE_DIR "/programs/server/js/lz-string.js"); INCBIN(resource_binary_html, SOURCE_DIR "/programs/server/binary.html"); +INCBIN(resource_merges_html, SOURCE_DIR "/programs/server/merges.html"); namespace DB @@ -29,6 +30,7 @@ namespace DB PlayWebUIRequestHandler::PlayWebUIRequestHandler(IServer & server_) : server(server_) {} DashboardWebUIRequestHandler::DashboardWebUIRequestHandler(IServer & server_) : server(server_) {} BinaryWebUIRequestHandler::BinaryWebUIRequestHandler(IServer & server_) : server(server_) {} +MergesWebUIRequestHandler::MergesWebUIRequestHandler(IServer & server_) : server(server_) {} JavaScriptWebUIRequestHandler::JavaScriptWebUIRequestHandler(IServer & server_) : server(server_) {} static void handle(HTTPServerRequest & request, HTTPServerResponse & response, std::string_view html) @@ -70,6 +72,11 @@ void BinaryWebUIRequestHandler::handleRequest(HTTPServerRequest & request, HTTPS handle(request, response, {reinterpret_cast(gresource_binary_htmlData), gresource_binary_htmlSize}); } +void MergesWebUIRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event &) +{ + handle(request, response, {reinterpret_cast(gresource_merges_htmlData), gresource_merges_htmlSize}); +} + void JavaScriptWebUIRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event &) { if (request.getURI() == "/js/uplot.js") diff --git a/src/Server/WebUIRequestHandler.h b/src/Server/WebUIRequestHandler.h index b84c8f6534d..70e4db6c5df 100644 --- a/src/Server/WebUIRequestHandler.h +++ b/src/Server/WebUIRequestHandler.h @@ -37,6 +37,15 @@ public: void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override; }; +class MergesWebUIRequestHandler : public HTTPRequestHandler +{ +private: + IServer & server; +public: + explicit MergesWebUIRequestHandler(IServer & server_); + void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override; +}; + class JavaScriptWebUIRequestHandler : public HTTPRequestHandler { private: diff --git a/src/Storages/AlterCommands.cpp b/src/Storages/AlterCommands.cpp index 7c328526ab7..ab4403b3a94 100644 --- a/src/Storages/AlterCommands.cpp +++ b/src/Storages/AlterCommands.cpp @@ -10,7 +10,6 @@ #include #include #include -#include #include #include #include @@ -34,7 +33,6 @@ #include #include #include -#include #include #include #include @@ -43,6 +41,7 @@ #include + namespace DB { namespace Setting @@ -51,7 +50,6 @@ namespace Setting extern const SettingsBool allow_experimental_codecs; extern const SettingsBool allow_suspicious_codecs; extern const SettingsBool allow_suspicious_ttl_expressions; - extern const SettingsBool enable_deflate_qpl_codec; extern const SettingsBool enable_zstd_qat_codec; extern const SettingsBool flatten_nested; } @@ -497,7 +495,7 @@ void AlterCommand::apply(StorageInMemoryMetadata & metadata, ContextPtr context) column.comment = *comment; if (codec) - column.codec = CompressionCodecFactory::instance().validateCodecAndGetPreprocessedAST(codec, data_type, false, true, true, true); + column.codec = CompressionCodecFactory::instance().validateCodecAndGetPreprocessedAST(codec, data_type, false, true, true); column.ttl = ttl; @@ -566,7 +564,7 @@ void AlterCommand::apply(StorageInMemoryMetadata & metadata, ContextPtr context) else { if (codec) - column.codec = CompressionCodecFactory::instance().validateCodecAndGetPreprocessedAST(codec, data_type ? data_type : column.type, false, true, true, true); + column.codec = CompressionCodecFactory::instance().validateCodecAndGetPreprocessedAST(codec, data_type ? data_type : column.type, false, true, true); if (comment) column.comment = *comment; @@ -1381,7 +1379,6 @@ void AlterCommands::validate(const StoragePtr & table, ContextPtr context) const command.data_type, !settings[Setting::allow_suspicious_codecs], settings[Setting::allow_experimental_codecs], - settings[Setting::enable_deflate_qpl_codec], settings[Setting::enable_zstd_qat_codec]); } @@ -1412,7 +1409,6 @@ void AlterCommands::validate(const StoragePtr & table, ContextPtr context) const command.data_type, !context->getSettingsRef()[Setting::allow_suspicious_codecs], context->getSettingsRef()[Setting::allow_experimental_codecs], - context->getSettingsRef()[Setting::enable_deflate_qpl_codec], context->getSettingsRef()[Setting::enable_zstd_qat_codec]); } auto column_default = all_columns.getDefault(column_name); diff --git a/src/Storages/ColumnsDescription.cpp b/src/Storages/ColumnsDescription.cpp index 3922f1cfcfb..b96c620592d 100644 --- a/src/Storages/ColumnsDescription.cpp +++ b/src/Storages/ColumnsDescription.cpp @@ -215,7 +215,7 @@ void ColumnDescription::readText(ReadBuffer & buf) comment = col_ast->comment->as().value.safeGet(); if (col_ast->codec) - codec = CompressionCodecFactory::instance().validateCodecAndGetPreprocessedAST(col_ast->codec, type, false, true, true, true); + codec = CompressionCodecFactory::instance().validateCodecAndGetPreprocessedAST(col_ast->codec, type, false, true, true); if (col_ast->ttl) ttl = col_ast->ttl; diff --git a/src/Storages/Distributed/DistributedAsyncInsertBatch.cpp b/src/Storages/Distributed/DistributedAsyncInsertBatch.cpp index e55eb01ae74..7270e02f506 100644 --- a/src/Storages/Distributed/DistributedAsyncInsertBatch.cpp +++ b/src/Storages/Distributed/DistributedAsyncInsertBatch.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include @@ -21,6 +22,11 @@ namespace Setting extern const SettingsBool distributed_insert_skip_read_only_replicas; } +namespace DistributedSetting +{ + extern const DistributedSettingsBool fsync_after_insert; +} + namespace ErrorCodes { extern const int MEMORY_LIMIT_EXCEEDED; @@ -53,7 +59,7 @@ bool isSplittableErrorCode(int code, bool remote) DistributedAsyncInsertBatch::DistributedAsyncInsertBatch(DistributedAsyncInsertDirectoryQueue & parent_) : parent(parent_) , split_batch_on_failure(parent.split_batch_on_failure) - , fsync(parent.storage.getDistributedSettingsRef().fsync_after_insert) + , fsync(parent.storage.getDistributedSettingsRef()[DistributedSetting::fsync_after_insert]) , dir_fsync(parent.dir_fsync) {} diff --git a/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp b/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp index 53d62b7dd23..377919366b5 100644 --- a/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp +++ b/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include @@ -23,10 +24,6 @@ #include #include #include -#include -#include -#include -#include #include @@ -58,6 +55,15 @@ namespace Setting extern const SettingsUInt64 min_insert_block_size_rows; } +namespace DistributedSetting +{ + extern const DistributedSettingsUInt64 background_insert_batch; + extern const DistributedSettingsMilliseconds background_insert_max_sleep_time_ms; + extern const DistributedSettingsMilliseconds background_insert_sleep_time_ms; + extern const DistributedSettingsUInt64 background_insert_split_batch_on_failure; + extern const DistributedSettingsBool fsync_directories; +} + namespace ErrorCodes { extern const int INCORRECT_FILE_NAME; @@ -120,16 +126,16 @@ DistributedAsyncInsertDirectoryQueue::DistributedAsyncInsertDirectoryQueue( , path(fs::path(disk->getPath()) / relative_path / "") , broken_relative_path(fs::path(relative_path) / "broken") , broken_path(fs::path(path) / "broken" / "") - , should_batch_inserts(storage.getDistributedSettingsRef().background_insert_batch) - , split_batch_on_failure(storage.getDistributedSettingsRef().background_insert_split_batch_on_failure) - , dir_fsync(storage.getDistributedSettingsRef().fsync_directories) + , should_batch_inserts(storage.getDistributedSettingsRef()[DistributedSetting::background_insert_batch]) + , split_batch_on_failure(storage.getDistributedSettingsRef()[DistributedSetting::background_insert_split_batch_on_failure]) + , dir_fsync(storage.getDistributedSettingsRef()[DistributedSetting::fsync_directories]) , min_batched_block_size_rows(storage.getContext()->getSettingsRef()[Setting::min_insert_block_size_rows]) , min_batched_block_size_bytes(storage.getContext()->getSettingsRef()[Setting::min_insert_block_size_bytes]) , current_batch_file_path(path + "current_batch.txt") , pending_files(std::numeric_limits::max()) - , default_sleep_time(storage.getDistributedSettingsRef().background_insert_sleep_time_ms.totalMilliseconds()) + , default_sleep_time(storage.getDistributedSettingsRef()[DistributedSetting::background_insert_sleep_time_ms].totalMilliseconds()) , sleep_time(default_sleep_time) - , max_sleep_time(storage.getDistributedSettingsRef().background_insert_max_sleep_time_ms.totalMilliseconds()) + , max_sleep_time(storage.getDistributedSettingsRef()[DistributedSetting::background_insert_max_sleep_time_ms].totalMilliseconds()) , log(getLogger(getLoggerName())) , monitor_blocker(monitor_blocker_) , metric_pending_bytes(CurrentMetrics::DistributedBytesToInsert, 0) diff --git a/src/Storages/Distributed/DistributedSettings.cpp b/src/Storages/Distributed/DistributedSettings.cpp index 1f6aa6c72fa..5112c1e7011 100644 --- a/src/Storages/Distributed/DistributedSettings.cpp +++ b/src/Storages/Distributed/DistributedSettings.cpp @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -7,7 +8,6 @@ #include - namespace DB { @@ -16,8 +16,56 @@ namespace ErrorCodes extern const int UNKNOWN_SETTING; } +#define LIST_OF_DISTRIBUTED_SETTINGS(DECLARE, ALIAS) \ + DECLARE(Bool, fsync_after_insert, false, "Do fsync for every inserted. Will decreases performance of inserts (only for background INSERT, i.e. distributed_foreground_insert=false)", 0) \ + DECLARE(Bool, fsync_directories, false, "Do fsync for temporary directory (that is used for background INSERT only) after all part operations (writes, renames, etc.).", 0) \ + /** This is the distributed version of the skip_unavailable_shards setting available in src/Core/Settings.cpp */ \ + DECLARE(Bool, skip_unavailable_shards, false, "If true, ClickHouse silently skips unavailable shards. Shard is marked as unavailable when: 1) The shard cannot be reached due to a connection failure. 2) Shard is unresolvable through DNS. 3) Table does not exist on the shard.", 0) \ + /** Inserts settings. */ \ + DECLARE(UInt64, bytes_to_throw_insert, 0, "If more than this number of compressed bytes will be pending for background INSERT, an exception will be thrown. 0 - do not throw.", 0) \ + DECLARE(UInt64, bytes_to_delay_insert, 0, "If more than this number of compressed bytes will be pending for background INSERT, the query will be delayed. 0 - do not delay.", 0) \ + DECLARE(UInt64, max_delay_to_insert, 60, "Max delay of inserting data into Distributed table in seconds, if there are a lot of pending bytes for background send.", 0) \ + /** Async INSERT settings */ \ + DECLARE(UInt64, background_insert_batch, 0, "Default - distributed_background_insert_batch", 0) ALIAS(monitor_batch_inserts) \ + DECLARE(UInt64, background_insert_split_batch_on_failure, 0, "Default - distributed_background_insert_split_batch_on_failure", 0) ALIAS(monitor_split_batch_on_failure) \ + DECLARE(Milliseconds, background_insert_sleep_time_ms, 0, "Default - distributed_background_insert_sleep_time_ms", 0) ALIAS(monitor_sleep_time_ms) \ + DECLARE(Milliseconds, background_insert_max_sleep_time_ms, 0, "Default - distributed_background_insert_max_sleep_time_ms", 0) ALIAS(monitor_max_sleep_time_ms) \ + DECLARE(Bool, flush_on_detach, true, "Flush data to remote nodes on DETACH/DROP/server shutdown", 0) \ + +DECLARE_SETTINGS_TRAITS(DistributedSettingsTraits, LIST_OF_DISTRIBUTED_SETTINGS) IMPLEMENT_SETTINGS_TRAITS(DistributedSettingsTraits, LIST_OF_DISTRIBUTED_SETTINGS) +struct DistributedSettingsImpl : public BaseSettings +{ +}; + +#define INITIALIZE_SETTING_EXTERN(TYPE, NAME, DEFAULT, DESCRIPTION, FLAGS) DistributedSettings##TYPE NAME = &DistributedSettingsImpl ::NAME; + +namespace DistributedSetting +{ +LIST_OF_DISTRIBUTED_SETTINGS(INITIALIZE_SETTING_EXTERN, SKIP_ALIAS) +} + +#undef INITIALIZE_SETTING_EXTERN + +DistributedSettings::DistributedSettings() : impl(std::make_unique()) +{ +} + +DistributedSettings::DistributedSettings(const DistributedSettings & settings) + : impl(std::make_unique(*settings.impl)) +{ +} + +DistributedSettings::DistributedSettings(DistributedSettings && settings) noexcept + : impl(std::make_unique(std::move(*settings.impl))) +{ +} + +DistributedSettings::~DistributedSettings() = default; + +DISTRIBUTED_SETTINGS_SUPPORTED_TYPES(DistributedSettings, IMPLEMENT_SETTING_SUBSCRIPT_OPERATOR) + void DistributedSettings::loadFromConfig(const String & config_elem, const Poco::Util::AbstractConfiguration & config) { if (!config.has(config_elem)) @@ -29,7 +77,7 @@ void DistributedSettings::loadFromConfig(const String & config_elem, const Poco: try { for (const String & key : config_keys) - set(key, config.getString(config_elem + "." + key)); + impl->set(key, config.getString(config_elem + "." + key)); } catch (Exception & e) { @@ -45,7 +93,7 @@ void DistributedSettings::loadFromQuery(ASTStorage & storage_def) { try { - applyChanges(storage_def.settings->changes); + impl->applyChanges(storage_def.settings->changes); } catch (Exception & e) { diff --git a/src/Storages/Distributed/DistributedSettings.h b/src/Storages/Distributed/DistributedSettings.h index c6ad9ab6fa4..cc183eab96f 100644 --- a/src/Storages/Distributed/DistributedSettings.h +++ b/src/Storages/Distributed/DistributedSettings.h @@ -1,44 +1,42 @@ #pragma once -#include -#include - +#include +#include namespace Poco::Util { class AbstractConfiguration; } - namespace DB { class ASTStorage; +struct DistributedSettingsImpl; -#define LIST_OF_DISTRIBUTED_SETTINGS(M, ALIAS) \ - M(Bool, fsync_after_insert, false, "Do fsync for every inserted. Will decreases performance of inserts (only for background INSERT, i.e. distributed_foreground_insert=false)", 0) \ - M(Bool, fsync_directories, false, "Do fsync for temporary directory (that is used for background INSERT only) after all part operations (writes, renames, etc.).", 0) \ - /** This is the distributed version of the skip_unavailable_shards setting available in src/Core/Settings.cpp */ \ - M(Bool, skip_unavailable_shards, false, "If true, ClickHouse silently skips unavailable shards. Shard is marked as unavailable when: 1) The shard cannot be reached due to a connection failure. 2) Shard is unresolvable through DNS. 3) Table does not exist on the shard.", 0) \ - /** Inserts settings. */ \ - M(UInt64, bytes_to_throw_insert, 0, "If more than this number of compressed bytes will be pending for background INSERT, an exception will be thrown. 0 - do not throw.", 0) \ - M(UInt64, bytes_to_delay_insert, 0, "If more than this number of compressed bytes will be pending for background INSERT, the query will be delayed. 0 - do not delay.", 0) \ - M(UInt64, max_delay_to_insert, 60, "Max delay of inserting data into Distributed table in seconds, if there are a lot of pending bytes for background send.", 0) \ - /** Async INSERT settings */ \ - M(UInt64, background_insert_batch, 0, "Default - distributed_background_insert_batch", 0) ALIAS(monitor_batch_inserts) \ - M(UInt64, background_insert_split_batch_on_failure, 0, "Default - distributed_background_insert_split_batch_on_failure", 0) ALIAS(monitor_split_batch_on_failure) \ - M(Milliseconds, background_insert_sleep_time_ms, 0, "Default - distributed_background_insert_sleep_time_ms", 0) ALIAS(monitor_sleep_time_ms) \ - M(Milliseconds, background_insert_max_sleep_time_ms, 0, "Default - distributed_background_insert_max_sleep_time_ms", 0) ALIAS(monitor_max_sleep_time_ms) \ - M(Bool, flush_on_detach, true, "Flush data to remote nodes on DETACH/DROP/server shutdown", 0) \ - -DECLARE_SETTINGS_TRAITS(DistributedSettingsTraits, LIST_OF_DISTRIBUTED_SETTINGS) +/// List of available types supported in DistributedSettings object +#define DISTRIBUTED_SETTINGS_SUPPORTED_TYPES(CLASS_NAME, M) \ + M(CLASS_NAME, Bool) \ + M(CLASS_NAME, Milliseconds) \ + M(CLASS_NAME, UInt64) +DISTRIBUTED_SETTINGS_SUPPORTED_TYPES(DistributedSettings, DECLARE_SETTING_TRAIT) /** Settings for the Distributed family of engines. */ -struct DistributedSettings : public BaseSettings +struct DistributedSettings { + DistributedSettings(); + DistributedSettings(const DistributedSettings & settings); + DistributedSettings(DistributedSettings && settings) noexcept; + ~DistributedSettings(); + + DISTRIBUTED_SETTINGS_SUPPORTED_TYPES(DistributedSettings, DECLARE_SETTING_SUBSCRIPT_OPERATOR) + void loadFromConfig(const String & config_elem, const Poco::Util::AbstractConfiguration & config); void loadFromQuery(ASTStorage & storage_def); + +private: + std::unique_ptr impl; }; } diff --git a/src/Storages/Distributed/DistributedSink.cpp b/src/Storages/Distributed/DistributedSink.cpp index 5bc3fcc5be3..934bf04d696 100644 --- a/src/Storages/Distributed/DistributedSink.cpp +++ b/src/Storages/Distributed/DistributedSink.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include #include @@ -34,6 +35,8 @@ #include #include +#include + #include @@ -60,7 +63,6 @@ namespace Setting extern const SettingsBool allow_suspicious_codecs; extern const SettingsMilliseconds distributed_background_insert_sleep_time_ms; extern const SettingsBool distributed_insert_skip_read_only_replicas; - extern const SettingsBool enable_deflate_qpl_codec; extern const SettingsBool enable_zstd_qat_codec; extern const SettingsBool insert_allow_materialized_columns; extern const SettingsBool insert_distributed_one_random_shard; @@ -75,6 +77,12 @@ namespace Setting extern const SettingsBool use_compact_format_in_distributed_parts_names; } +namespace DistributedSetting +{ + extern const DistributedSettingsBool fsync_after_insert; + extern const DistributedSettingsBool fsync_directories; +} + namespace ErrorCodes { extern const int LOGICAL_ERROR; @@ -785,8 +793,8 @@ void DistributedSink::writeToShard(const Cluster::ShardInfo & shard_info, const const auto & settings = context->getSettingsRef(); const auto & distributed_settings = storage.getDistributedSettingsRef(); - bool fsync = distributed_settings.fsync_after_insert; - bool dir_fsync = distributed_settings.fsync_directories; + bool fsync = distributed_settings[DistributedSetting::fsync_after_insert]; + bool dir_fsync = distributed_settings[DistributedSetting::fsync_directories]; std::string compression_method = Poco::toUpper(settings[Setting::network_compression_method].toString()); std::optional compression_level; @@ -799,7 +807,6 @@ void DistributedSink::writeToShard(const Cluster::ShardInfo & shard_info, const compression_level, !settings[Setting::allow_suspicious_codecs], settings[Setting::allow_experimental_codecs], - settings[Setting::enable_deflate_qpl_codec], settings[Setting::enable_zstd_qat_codec]); CompressionCodecPtr compression_codec = CompressionCodecFactory::instance().get(compression_method, compression_level); diff --git a/src/Storages/ExecutableSettings.cpp b/src/Storages/ExecutableSettings.cpp index d00e4098181..fe5ad0ae7ed 100644 --- a/src/Storages/ExecutableSettings.cpp +++ b/src/Storages/ExecutableSettings.cpp @@ -1,9 +1,9 @@ -#include "ExecutableSettings.h" - #include +#include #include #include #include +#include #include namespace DB @@ -14,15 +14,67 @@ namespace ErrorCodes extern const int UNKNOWN_SETTING; } +#define LIST_OF_EXECUTABLE_SETTINGS(DECLARE, ALIAS) \ + DECLARE(Bool, send_chunk_header, false, "Send number_of_rows\n before sending chunk to process.", 0) \ + DECLARE(UInt64, pool_size, 16, "Processes pool size. If size == 0, then there is no size restrictions.", 0) \ + DECLARE(UInt64, max_command_execution_time, 10, "Max command execution time in seconds.", 0) \ + DECLARE(UInt64, command_termination_timeout, 10, "Command termination timeout in seconds.", 0) \ + DECLARE(UInt64, command_read_timeout, 10000, "Timeout for reading data from command stdout in milliseconds.", 0) \ + DECLARE(UInt64, command_write_timeout, 10000, "Timeout for writing data to command stdin in milliseconds.", 0) \ + DECLARE(ExternalCommandStderrReaction, stderr_reaction, ExternalCommandStderrReaction::NONE, "Reaction when external command outputs data to its stderr.", 0) \ + DECLARE(Bool, check_exit_code, false, "Throw exception if the command exited with non-zero status code.", 0) \ + +DECLARE_SETTINGS_TRAITS(ExecutableSettingsTraits, LIST_OF_EXECUTABLE_SETTINGS) IMPLEMENT_SETTINGS_TRAITS(ExecutableSettingsTraits, LIST_OF_EXECUTABLE_SETTINGS) +struct ExecutableSettingsImpl : public BaseSettings +{ +}; + +#define INITIALIZE_SETTING_EXTERN(TYPE, NAME, DEFAULT, DESCRIPTION, FLAGS) ExecutableSettings##TYPE NAME = &ExecutableSettingsImpl ::NAME; + +namespace ExecutableSetting +{ +LIST_OF_EXECUTABLE_SETTINGS(INITIALIZE_SETTING_EXTERN, SKIP_ALIAS) +} + +#undef INITIALIZE_SETTING_EXTERN + +ExecutableSettings::ExecutableSettings() + : script_name({}) + , script_arguments({}) + , is_executable_pool(false) + , impl(std::make_unique()) +{ +} + +ExecutableSettings::ExecutableSettings(const ExecutableSettings & settings) + : script_name(settings.script_name) + , script_arguments(settings.script_arguments) + , is_executable_pool(settings.is_executable_pool) + , impl(std::make_unique(*settings.impl)) +{ +} + +ExecutableSettings::ExecutableSettings(ExecutableSettings && settings) noexcept + : script_name(std::move(settings.script_name)) + , script_arguments(std::move(settings.script_arguments)) + , is_executable_pool(settings.is_executable_pool) + , impl(std::make_unique(std::move(*settings.impl))) +{ +} + +ExecutableSettings::~ExecutableSettings() = default; + +EXECUTABLE_SETTINGS_SUPPORTED_TYPES(ExecutableSettings, IMPLEMENT_SETTING_SUBSCRIPT_OPERATOR) + void ExecutableSettings::loadFromQuery(ASTStorage & storage_def) { if (storage_def.settings) { try { - applyChanges(storage_def.settings->changes); + impl->applyChanges(storage_def.settings->changes); } catch (Exception & e) { @@ -39,4 +91,8 @@ void ExecutableSettings::loadFromQuery(ASTStorage & storage_def) } } +void ExecutableSettings::applyChanges(const SettingsChanges & changes) +{ + impl->applyChanges(changes); +} } diff --git a/src/Storages/ExecutableSettings.h b/src/Storages/ExecutableSettings.h index 95627f08d16..4fdf605e9dd 100644 --- a/src/Storages/ExecutableSettings.h +++ b/src/Storages/ExecutableSettings.h @@ -1,35 +1,42 @@ #pragma once -#include -#include +#include #include +#include namespace DB { class ASTStorage; +class SettingsChanges; +struct ExecutableSettingsImpl; -#define LIST_OF_EXECUTABLE_SETTINGS(M, ALIAS) \ - M(Bool, send_chunk_header, false, "Send number_of_rows\n before sending chunk to process.", 0) \ - M(UInt64, pool_size, 16, "Processes pool size. If size == 0, then there is no size restrictions.", 0) \ - M(UInt64, max_command_execution_time, 10, "Max command execution time in seconds.", 0) \ - M(UInt64, command_termination_timeout, 10, "Command termination timeout in seconds.", 0) \ - M(UInt64, command_read_timeout, 10000, "Timeout for reading data from command stdout in milliseconds.", 0) \ - M(UInt64, command_write_timeout, 10000, "Timeout for writing data to command stdin in milliseconds.", 0) \ - M(ExternalCommandStderrReaction, stderr_reaction, ExternalCommandStderrReaction::NONE, "Reaction when external command outputs data to its stderr.", 0) \ - M(Bool, check_exit_code, false, "Throw exception if the command exited with non-zero status code.", 0) \ +#define EXECUTABLE_SETTINGS_SUPPORTED_TYPES(CLASS_NAME, M) \ + M(CLASS_NAME, Bool) \ + M(CLASS_NAME, ExternalCommandStderrReaction) \ + M(CLASS_NAME, UInt64) -DECLARE_SETTINGS_TRAITS(ExecutableSettingsTraits, LIST_OF_EXECUTABLE_SETTINGS) +EXECUTABLE_SETTINGS_SUPPORTED_TYPES(ExecutableSettings, DECLARE_SETTING_TRAIT) /// Settings for ExecutablePool engine. -struct ExecutableSettings : public BaseSettings +struct ExecutableSettings { std::string script_name; std::vector script_arguments; - bool is_executable_pool = false; + ExecutableSettings(); + ExecutableSettings(const ExecutableSettings & settings); + ExecutableSettings(ExecutableSettings && settings) noexcept; + ~ExecutableSettings(); + + EXECUTABLE_SETTINGS_SUPPORTED_TYPES(ExecutableSettings, DECLARE_SETTING_SUBSCRIPT_OPERATOR) + void loadFromQuery(ASTStorage & storage_def); + void applyChanges(const SettingsChanges & changes); + +private: + std::unique_ptr impl; }; } diff --git a/src/Storages/FileLog/DirectoryWatcherBase.cpp b/src/Storages/FileLog/DirectoryWatcherBase.cpp index 338de7a1288..a83d53de48a 100644 --- a/src/Storages/FileLog/DirectoryWatcherBase.cpp +++ b/src/Storages/FileLog/DirectoryWatcherBase.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include #include @@ -18,6 +19,13 @@ namespace ErrorCodes extern const int IO_SETUP_ERROR; } +namespace FileLogSetting +{ + extern const FileLogSettingsUInt64 poll_directory_watch_events_backoff_factor; + extern const FileLogSettingsMilliseconds poll_directory_watch_events_backoff_init; + extern const FileLogSettingsMilliseconds poll_directory_watch_events_backoff_max; +} + static constexpr int buffer_size = 4096; DirectoryWatcherBase::DirectoryWatcherBase( @@ -26,7 +34,7 @@ DirectoryWatcherBase::DirectoryWatcherBase( , owner(owner_) , path(path_) , event_mask(event_mask_) - , milliseconds_to_wait(owner.storage.getFileLogSettings()->poll_directory_watch_events_backoff_init.totalMilliseconds()) + , milliseconds_to_wait((*owner.storage.getFileLogSettings())[FileLogSetting::poll_directory_watch_events_backoff_init].totalMilliseconds()) { if (!std::filesystem::exists(path)) throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Path {} does not exist", path); @@ -77,7 +85,7 @@ void DirectoryWatcherBase::watchFunc() const auto & settings = owner.storage.getFileLogSettings(); if (poll(pfds, 2, static_cast(milliseconds_to_wait)) > 0 && pfds[0].revents & POLLIN) { - milliseconds_to_wait = settings->poll_directory_watch_events_backoff_init.totalMilliseconds(); + milliseconds_to_wait = (*settings)[FileLogSetting::poll_directory_watch_events_backoff_init].totalMilliseconds(); ssize_t n = read(inotify_fd, buffer.data(), buffer.size()); int i = 0; if (n > 0) @@ -125,8 +133,8 @@ void DirectoryWatcherBase::watchFunc() } else { - if (milliseconds_to_wait < static_cast(settings->poll_directory_watch_events_backoff_max.totalMilliseconds())) - milliseconds_to_wait *= settings->poll_directory_watch_events_backoff_factor.value; + if (milliseconds_to_wait < static_cast((*settings)[FileLogSetting::poll_directory_watch_events_backoff_max].totalMilliseconds())) + milliseconds_to_wait *= (*settings)[FileLogSetting::poll_directory_watch_events_backoff_factor].value; } } } diff --git a/src/Storages/FileLog/FileLogSettings.cpp b/src/Storages/FileLog/FileLogSettings.cpp index cf2d6b64d7c..d8897278bf5 100644 --- a/src/Storages/FileLog/FileLogSettings.cpp +++ b/src/Storages/FileLog/FileLogSettings.cpp @@ -1,10 +1,11 @@ +#include +#include +#include #include #include #include #include #include -#include - namespace DB { @@ -15,15 +16,61 @@ namespace ErrorCodes extern const int INVALID_SETTING_VALUE; } +#define FILELOG_RELATED_SETTINGS(DECLARE, ALIAS) \ + /* default is stream_poll_timeout_ms */ \ + DECLARE(Milliseconds, poll_timeout_ms, 0, "Timeout for single poll from StorageFileLog.", 0) \ + DECLARE(UInt64, poll_max_batch_size, 0, "Maximum amount of messages to be polled in a single StorageFileLog poll.", 0) \ + DECLARE(UInt64, max_block_size, 0, "Number of row collected by poll(s) for flushing data from StorageFileLog.", 0) \ + DECLARE(MaxThreads, max_threads, 0, "Number of max threads to parse files, default is 0, which means the number will be max(1, physical_cpu_cores / 4)", 0) \ + DECLARE(Milliseconds, poll_directory_watch_events_backoff_init, 500, "The initial sleep value for watch directory thread.", 0) \ + DECLARE(Milliseconds, poll_directory_watch_events_backoff_max, 32000, "The max sleep value for watch directory thread.", 0) \ + DECLARE(UInt64, poll_directory_watch_events_backoff_factor, 2, "The speed of backoff, exponential by default", 0) \ + DECLARE(StreamingHandleErrorMode, handle_error_mode, StreamingHandleErrorMode::DEFAULT, "How to handle errors for FileLog engine. Possible values: default (throw an exception after nats_skip_broken_messages broken messages), stream (save broken messages and errors in virtual columns _raw_message, _error).", 0) \ + +#define LIST_OF_FILELOG_SETTINGS(M, ALIAS) \ + FILELOG_RELATED_SETTINGS(M, ALIAS) \ + LIST_OF_ALL_FORMAT_SETTINGS(M, ALIAS) + +DECLARE_SETTINGS_TRAITS(FileLogSettingsTraits, LIST_OF_FILELOG_SETTINGS) IMPLEMENT_SETTINGS_TRAITS(FileLogSettingsTraits, LIST_OF_FILELOG_SETTINGS) +struct FileLogSettingsImpl : public BaseSettings +{ +}; + +#define INITIALIZE_SETTING_EXTERN(TYPE, NAME, DEFAULT, DESCRIPTION, FLAGS) FileLogSettings##TYPE NAME = &FileLogSettingsImpl ::NAME; + +namespace FileLogSetting +{ +LIST_OF_FILELOG_SETTINGS(INITIALIZE_SETTING_EXTERN, SKIP_ALIAS) +} + +#undef INITIALIZE_SETTING_EXTERN + +FileLogSettings::FileLogSettings() : impl(std::make_unique()) +{ +} + +FileLogSettings::FileLogSettings(const FileLogSettings & settings) : impl(std::make_unique(*settings.impl)) +{ +} + +FileLogSettings::FileLogSettings(FileLogSettings && settings) noexcept + : impl(std::make_unique(std::move(*settings.impl))) +{ +} + +FileLogSettings::~FileLogSettings() = default; + +FILELOG_SETTINGS_SUPPORTED_TYPES(FileLogSettings, IMPLEMENT_SETTING_SUBSCRIPT_OPERATOR) + void FileLogSettings::loadFromQuery(ASTStorage & storage_def) { if (storage_def.settings) { try { - applyChanges(storage_def.settings->changes); + impl->applyChanges(storage_def.settings->changes); } catch (Exception & e) { @@ -41,8 +88,9 @@ void FileLogSettings::loadFromQuery(ASTStorage & storage_def) /// Check that batch size is not too high (the same as we check setting max_block_size). constexpr UInt64 max_sane_block_rows_size = 4294967296; // 2^32 - if (poll_max_batch_size > max_sane_block_rows_size) - throw Exception(ErrorCodes::INVALID_SETTING_VALUE, "Sanity check: 'poll_max_batch_size' value is too high ({})", poll_max_batch_size); + if (impl->poll_max_batch_size > max_sane_block_rows_size) + throw Exception( + ErrorCodes::INVALID_SETTING_VALUE, "Sanity check: 'poll_max_batch_size' value is too high ({})", impl->poll_max_batch_size); } } diff --git a/src/Storages/FileLog/FileLogSettings.h b/src/Storages/FileLog/FileLogSettings.h index fd20dea702a..cc761925347 100644 --- a/src/Storages/FileLog/FileLogSettings.h +++ b/src/Storages/FileLog/FileLogSettings.h @@ -1,38 +1,60 @@ #pragma once -#include -#include +#include #include +#include namespace DB { class ASTStorage; +struct FileLogSettingsImpl; +/// List of available types supported in FileLogSettings object +#define FILELOG_SETTINGS_SUPPORTED_TYPES(CLASS_NAME, M) \ + M(CLASS_NAME, ArrowCompression) \ + M(CLASS_NAME, Bool) \ + M(CLASS_NAME, CapnProtoEnumComparingMode) \ + M(CLASS_NAME, Char) \ + M(CLASS_NAME, DateTimeInputFormat) \ + M(CLASS_NAME, DateTimeOutputFormat) \ + M(CLASS_NAME, DateTimeOverflowBehavior) \ + M(CLASS_NAME, Double) \ + M(CLASS_NAME, EscapingRule) \ + M(CLASS_NAME, Float) \ + M(CLASS_NAME, IdentifierQuotingRule) \ + M(CLASS_NAME, IdentifierQuotingStyle) \ + M(CLASS_NAME, Int64) \ + M(CLASS_NAME, IntervalOutputFormat) \ + M(CLASS_NAME, MaxThreads) \ + M(CLASS_NAME, Milliseconds) \ + M(CLASS_NAME, MsgPackUUIDRepresentation) \ + M(CLASS_NAME, ORCCompression) \ + M(CLASS_NAME, ParquetCompression) \ + M(CLASS_NAME, ParquetVersion) \ + M(CLASS_NAME, SchemaInferenceMode) \ + M(CLASS_NAME, StreamingHandleErrorMode) \ + M(CLASS_NAME, String) \ + M(CLASS_NAME, UInt64) \ + M(CLASS_NAME, UInt64Auto) \ + M(CLASS_NAME, URI) -#define FILELOG_RELATED_SETTINGS(M, ALIAS) \ - /* default is stream_poll_timeout_ms */ \ - M(Milliseconds, poll_timeout_ms, 0, "Timeout for single poll from StorageFileLog.", 0) \ - M(UInt64, poll_max_batch_size, 0, "Maximum amount of messages to be polled in a single StorageFileLog poll.", 0) \ - M(UInt64, max_block_size, 0, "Number of row collected by poll(s) for flushing data from StorageFileLog.", 0) \ - M(UInt64, max_threads, 0, "Number of max threads to parse files, default is 0, which means the number will be max(1, physical_cpu_cores / 4)", 0) \ - M(Milliseconds, poll_directory_watch_events_backoff_init, 500, "The initial sleep value for watch directory thread.", 0) \ - M(Milliseconds, poll_directory_watch_events_backoff_max, 32000, "The max sleep value for watch directory thread.", 0) \ - M(UInt64, poll_directory_watch_events_backoff_factor, 2, "The speed of backoff, exponential by default", 0) \ - M(StreamingHandleErrorMode, handle_error_mode, StreamingHandleErrorMode::DEFAULT, "How to handle errors for FileLog engine. Possible values: default (throw an exception after nats_skip_broken_messages broken messages), stream (save broken messages and errors in virtual columns _raw_message, _error).", 0) \ - -#define LIST_OF_FILELOG_SETTINGS(M, ALIAS) \ - FILELOG_RELATED_SETTINGS(M, ALIAS) \ - LIST_OF_ALL_FORMAT_SETTINGS(M, ALIAS) - -DECLARE_SETTINGS_TRAITS(FileLogSettingsTraits, LIST_OF_FILELOG_SETTINGS) - +FILELOG_SETTINGS_SUPPORTED_TYPES(FileLogSettings, DECLARE_SETTING_TRAIT) /** Settings for the FileLog engine. * Could be loaded from a CREATE TABLE query (SETTINGS clause). */ -struct FileLogSettings : public BaseSettings +struct FileLogSettings { - void loadFromQuery(ASTStorage & storage_def); -}; + FileLogSettings(); + FileLogSettings(const FileLogSettings & settings); + FileLogSettings(FileLogSettings && settings) noexcept; + ~FileLogSettings(); + FILELOG_SETTINGS_SUPPORTED_TYPES(FileLogSettings, DECLARE_SETTING_SUBSCRIPT_OPERATOR) + + void loadFromQuery(ASTStorage & storage_def); + +private: + std::unique_ptr impl; +}; } diff --git a/src/Storages/FileLog/FileLogSource.h b/src/Storages/FileLog/FileLogSource.h index 3ac2b407e10..c29b4539152 100644 --- a/src/Storages/FileLog/FileLogSource.h +++ b/src/Storages/FileLog/FileLogSource.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include diff --git a/src/Storages/FileLog/StorageFileLog.cpp b/src/Storages/FileLog/StorageFileLog.cpp index df3e3a710a2..873393cff69 100644 --- a/src/Storages/FileLog/StorageFileLog.cpp +++ b/src/Storages/FileLog/StorageFileLog.cpp @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -42,6 +43,18 @@ namespace Setting extern const SettingsBool use_concurrency_control; } +namespace FileLogSetting +{ + extern const FileLogSettingsStreamingHandleErrorMode handle_error_mode; + extern const FileLogSettingsUInt64 max_block_size; + extern const FileLogSettingsMaxThreads max_threads; + extern const FileLogSettingsUInt64 poll_directory_watch_events_backoff_factor; + extern const FileLogSettingsMilliseconds poll_directory_watch_events_backoff_init; + extern const FileLogSettingsMilliseconds poll_directory_watch_events_backoff_max; + extern const FileLogSettingsUInt64 poll_max_batch_size; + extern const FileLogSettingsMilliseconds poll_timeout_ms; +} + namespace ErrorCodes { extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; @@ -103,7 +116,7 @@ private: auto modified_context = Context::createCopy(getContext()); - auto max_streams_number = std::min(file_log.filelog_settings->max_threads, file_log.file_infos.file_names.size()); + auto max_streams_number = std::min((*file_log.filelog_settings)[FileLogSetting::max_threads], file_log.file_infos.file_names.size()); /// Each stream responsible for closing it's files and store meta file_log.openFilesAndSetPos(); @@ -121,7 +134,7 @@ private: file_log.getPollTimeoutMillisecond(), stream_number, max_streams_number, - file_log.filelog_settings->handle_error_mode)); + (*file_log.filelog_settings)[FileLogSetting::handle_error_mode])); } return Pipe::unitePipes(std::move(pipes)); @@ -150,13 +163,13 @@ StorageFileLog::StorageFileLog( , format_name(format_name_) , log(getLogger("StorageFileLog (" + table_id_.table_name + ")")) , disk(getContext()->getStoragePolicy("default")->getDisks().at(0)) - , milliseconds_to_wait(filelog_settings->poll_directory_watch_events_backoff_init.totalMilliseconds()) + , milliseconds_to_wait((*filelog_settings)[FileLogSetting::poll_directory_watch_events_backoff_init].totalMilliseconds()) { StorageInMemoryMetadata storage_metadata; storage_metadata.setColumns(columns_); storage_metadata.setComment(comment); setInMemoryMetadata(storage_metadata); - setVirtuals(createVirtuals(filelog_settings->handle_error_mode)); + setVirtuals(createVirtuals((*filelog_settings)[FileLogSetting::handle_error_mode])); if (!fileOrSymlinkPathStartsWith(path, getContext()->getUserFilesPath())) { @@ -583,20 +596,20 @@ StorageFileLog::ReadMetadataResult StorageFileLog::readMetadata(const String & f size_t StorageFileLog::getMaxBlockSize() const { - return filelog_settings->max_block_size.changed ? filelog_settings->max_block_size.value + return (*filelog_settings)[FileLogSetting::max_block_size].changed ? (*filelog_settings)[FileLogSetting::max_block_size].value : getContext()->getSettingsRef()[Setting::max_insert_block_size].value; } size_t StorageFileLog::getPollMaxBatchSize() const { - size_t batch_size = filelog_settings->poll_max_batch_size.changed ? filelog_settings->poll_max_batch_size.value + size_t batch_size = (*filelog_settings)[FileLogSetting::poll_max_batch_size].changed ? (*filelog_settings)[FileLogSetting::poll_max_batch_size].value : getContext()->getSettingsRef()[Setting::max_block_size].value; return std::min(batch_size, getMaxBlockSize()); } size_t StorageFileLog::getPollTimeoutMillisecond() const { - return filelog_settings->poll_timeout_ms.changed ? filelog_settings->poll_timeout_ms.totalMilliseconds() + return (*filelog_settings)[FileLogSetting::poll_timeout_ms].changed ? (*filelog_settings)[FileLogSetting::poll_timeout_ms].totalMilliseconds() : getContext()->getSettingsRef()[Setting::stream_poll_timeout_ms].totalMilliseconds(); } @@ -663,12 +676,12 @@ void StorageFileLog::threadFunc() { LOG_TRACE(log, "Stream stalled. Reschedule."); if (milliseconds_to_wait - < static_cast(filelog_settings->poll_directory_watch_events_backoff_max.totalMilliseconds())) - milliseconds_to_wait *= filelog_settings->poll_directory_watch_events_backoff_factor.value; + < static_cast((*filelog_settings)[FileLogSetting::poll_directory_watch_events_backoff_max].totalMilliseconds())) + milliseconds_to_wait *= (*filelog_settings)[FileLogSetting::poll_directory_watch_events_backoff_factor].value; break; } - milliseconds_to_wait = filelog_settings->poll_directory_watch_events_backoff_init.totalMilliseconds(); + milliseconds_to_wait = (*filelog_settings)[FileLogSetting::poll_directory_watch_events_backoff_init].totalMilliseconds(); auto ts = std::chrono::steady_clock::now(); @@ -732,7 +745,7 @@ bool StorageFileLog::streamToViews() auto metadata_snapshot = getInMemoryMetadataPtr(); auto storage_snapshot = getStorageSnapshot(metadata_snapshot, getContext()); - auto max_streams_number = std::min(filelog_settings->max_threads.value, file_infos.file_names.size()); + auto max_streams_number = std::min((*filelog_settings)[FileLogSetting::max_threads].value, file_infos.file_names.size()); /// No files to parse if (max_streams_number == 0) { @@ -772,7 +785,7 @@ bool StorageFileLog::streamToViews() getPollTimeoutMillisecond(), stream_number, max_streams_number, - filelog_settings->handle_error_mode)); + (*filelog_settings)[FileLogSetting::handle_error_mode])); } auto input= Pipe::unitePipes(std::move(pipes)); @@ -819,12 +832,12 @@ void registerStorageFileLog(StorageFactory & factory) } auto cpu_cores = getNumberOfCPUCoresToUse(); - auto num_threads = filelog_settings->max_threads.value; + auto num_threads = (*filelog_settings)[FileLogSetting::max_threads]; - if (!num_threads) /// Default + if ((*filelog_settings)[FileLogSetting::max_threads].is_auto) /// Default { num_threads = std::max(1U, cpu_cores / 4); - filelog_settings->set("max_threads", num_threads); + (*filelog_settings)[FileLogSetting::max_threads] = num_threads; } else if (num_threads > cpu_cores) { @@ -835,18 +848,18 @@ void registerStorageFileLog(StorageFactory & factory) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Number of threads to parse files can not be lower than 1"); } - if (filelog_settings->max_block_size.changed && filelog_settings->max_block_size.value < 1) + if ((*filelog_settings)[FileLogSetting::max_block_size].changed && (*filelog_settings)[FileLogSetting::max_block_size].value < 1) { throw Exception(ErrorCodes::BAD_ARGUMENTS, "filelog_max_block_size can not be lower than 1"); } - if (filelog_settings->poll_max_batch_size.changed && filelog_settings->poll_max_batch_size.value < 1) + if ((*filelog_settings)[FileLogSetting::poll_max_batch_size].changed && (*filelog_settings)[FileLogSetting::poll_max_batch_size].value < 1) { throw Exception(ErrorCodes::BAD_ARGUMENTS, "filelog_poll_max_batch_size can not be lower than 1"); } - size_t init_sleep_time = filelog_settings->poll_directory_watch_events_backoff_init.totalMilliseconds(); - size_t max_sleep_time = filelog_settings->poll_directory_watch_events_backoff_max.totalMilliseconds(); + size_t init_sleep_time = (*filelog_settings)[FileLogSetting::poll_directory_watch_events_backoff_init].totalMilliseconds(); + size_t max_sleep_time = (*filelog_settings)[FileLogSetting::poll_directory_watch_events_backoff_max].totalMilliseconds(); if (init_sleep_time > max_sleep_time) { throw Exception(ErrorCodes::BAD_ARGUMENTS, @@ -854,8 +867,8 @@ void registerStorageFileLog(StorageFactory & factory) "be greater than poll_directory_watch_events_backoff_max"); } - if (filelog_settings->poll_directory_watch_events_backoff_factor.changed - && !filelog_settings->poll_directory_watch_events_backoff_factor.value) + if ((*filelog_settings)[FileLogSetting::poll_directory_watch_events_backoff_factor].changed + && !(*filelog_settings)[FileLogSetting::poll_directory_watch_events_backoff_factor].value) throw Exception(ErrorCodes::BAD_ARGUMENTS, "poll_directory_watch_events_backoff_factor can not be 0"); if (args_count != 2) diff --git a/src/Storages/FileLog/StorageFileLog.h b/src/Storages/FileLog/StorageFileLog.h index 5ce2a0eae51..f485d576613 100644 --- a/src/Storages/FileLog/StorageFileLog.h +++ b/src/Storages/FileLog/StorageFileLog.h @@ -4,9 +4,9 @@ #include #include -#include #include +#include #include #include @@ -25,6 +25,7 @@ namespace ErrorCodes } class FileLogDirectoryWatcher; +struct FileLogSettings; class StorageFileLog final : public IStorage, WithContext { diff --git a/src/Storages/Hive/HiveFile.cpp b/src/Storages/Hive/HiveFile.cpp index 64e47b51a6d..93e7ae69714 100644 --- a/src/Storages/Hive/HiveFile.cpp +++ b/src/Storages/Hive/HiveFile.cpp @@ -17,12 +17,20 @@ #include #include #include +#include #include #include namespace DB { +namespace HiveSetting +{ + extern const HiveSettingsBool enable_orc_file_minmax_index; + extern const HiveSettingsBool enable_orc_stripe_minmax_index; + extern const HiveSettingsBool enable_parquet_rowgroup_minmax_index; +} + namespace ErrorCodes { extern const int BAD_ARGUMENTS; @@ -181,7 +189,7 @@ void HiveORCFile::prepareColumnMapping() bool HiveORCFile::useFileMinMaxIndex() const { - return storage_settings->enable_orc_file_minmax_index; + return (*storage_settings)[HiveSetting::enable_orc_file_minmax_index]; } @@ -231,7 +239,7 @@ void HiveORCFile::loadFileMinMaxIndexImpl() bool HiveORCFile::useSplitMinMaxIndex() const { - return storage_settings->enable_orc_stripe_minmax_index; + return (*storage_settings)[HiveSetting::enable_orc_stripe_minmax_index]; } @@ -272,7 +280,7 @@ std::optional HiveORCFile::getRowsImpl() bool HiveParquetFile::useSplitMinMaxIndex() const { - return storage_settings->enable_parquet_rowgroup_minmax_index; + return (*storage_settings)[HiveSetting::enable_parquet_rowgroup_minmax_index]; } void HiveParquetFile::prepareReader() diff --git a/src/Storages/Hive/HiveFile.h b/src/Storages/Hive/HiveFile.h index a9468ce7d3d..2d7b39497c0 100644 --- a/src/Storages/Hive/HiveFile.h +++ b/src/Storages/Hive/HiveFile.h @@ -13,7 +13,6 @@ #include #include #include -#include #include namespace orc @@ -24,6 +23,8 @@ class ColumnStatistics; namespace DB { +struct HiveSettings; + namespace ErrorCodes { extern const int NOT_IMPLEMENTED; diff --git a/src/Storages/Hive/HiveSettings.cpp b/src/Storages/Hive/HiveSettings.cpp index 9519ce8b1f5..74e203b635e 100644 --- a/src/Storages/Hive/HiveSettings.cpp +++ b/src/Storages/Hive/HiveSettings.cpp @@ -2,10 +2,15 @@ #if USE_HIVE -#include -#include +#include +#include +#include #include #include +#include +#include + +#include namespace DB { @@ -14,8 +19,48 @@ namespace ErrorCodes extern const int UNKNOWN_SETTING; } +#define HIVE_RELATED_SETTINGS(DECLARE, ALIAS) \ + DECLARE(Char, hive_text_field_delimeter, '\x01', "How to split one row of hive data with format text", 0) \ + DECLARE(Bool, enable_orc_stripe_minmax_index, false, "Enable using ORC stripe level minmax index.", 0) \ + DECLARE(Bool, enable_parquet_rowgroup_minmax_index, false, "Enable using Parquet row-group level minmax index.", 0) \ + DECLARE(Bool, enable_orc_file_minmax_index, true, "Enable using ORC file level minmax index.", 0) + +#define LIST_OF_HIVE_SETTINGS(M, ALIAS) \ + HIVE_RELATED_SETTINGS(M, ALIAS) \ + LIST_OF_ALL_FORMAT_SETTINGS(M, ALIAS) + +DECLARE_SETTINGS_TRAITS(HiveSettingsTraits, LIST_OF_HIVE_SETTINGS) IMPLEMENT_SETTINGS_TRAITS(HiveSettingsTraits, LIST_OF_HIVE_SETTINGS) +struct HiveSettingsImpl : public BaseSettings +{ +}; + +#define INITIALIZE_SETTING_EXTERN(TYPE, NAME, DEFAULT, DESCRIPTION, FLAGS) HiveSettings##TYPE NAME = &HiveSettingsImpl ::NAME; + +namespace HiveSetting +{ +LIST_OF_HIVE_SETTINGS(INITIALIZE_SETTING_EXTERN, SKIP_ALIAS) +} + +#undef INITIALIZE_SETTING_EXTERN + +HiveSettings::HiveSettings() : impl(std::make_unique()) +{ +} + +HiveSettings::HiveSettings(const HiveSettings & settings) : impl(std::make_unique(*settings.impl)) +{ +} + +HiveSettings::HiveSettings(HiveSettings && settings) noexcept : impl(std::make_unique(std::move(*settings.impl))) +{ +} + +HiveSettings::~HiveSettings() = default; + +HIVE_SETTINGS_SUPPORTED_TYPES(HiveSettings, IMPLEMENT_SETTING_SUBSCRIPT_OPERATOR) + void HiveSettings::loadFromConfig(const String & config_elem, const Poco::Util::AbstractConfiguration & config) { if (!config.has(config_elem)) @@ -27,7 +72,7 @@ void HiveSettings::loadFromConfig(const String & config_elem, const Poco::Util:: try { for (const String & key : config_keys) - set(key, config.getString(config_elem + "." + key)); + impl->set(key, config.getString(config_elem + "." + key)); } catch (Exception & e) { @@ -43,7 +88,7 @@ void HiveSettings::loadFromQuery(ASTStorage & storage_def) { try { - applyChanges(storage_def.settings->changes); + impl->applyChanges(storage_def.settings->changes); } catch (Exception & e) { diff --git a/src/Storages/Hive/HiveSettings.h b/src/Storages/Hive/HiveSettings.h index 90156007f42..9e70d1b71c9 100644 --- a/src/Storages/Hive/HiveSettings.h +++ b/src/Storages/Hive/HiveSettings.h @@ -4,36 +4,65 @@ #if USE_HIVE -#include -#include -#include +#include #include +#include + +namespace Poco::Util +{ +class AbstractConfiguration; +} namespace DB { class ASTStorage; +struct HiveSettingsImpl; -#define HIVE_RELATED_SETTINGS(M, ALIAS) \ - M(Char, hive_text_field_delimeter, '\x01', "How to split one row of hive data with format text", 0) \ - M(Bool, enable_orc_stripe_minmax_index, false, "Enable using ORC stripe level minmax index.", 0) \ - M(Bool, enable_parquet_rowgroup_minmax_index, false, "Enable using Parquet row-group level minmax index.", 0) \ - M(Bool, enable_orc_file_minmax_index, true, "Enable using ORC file level minmax index.", 0) - -#define LIST_OF_HIVE_SETTINGS(M, ALIAS) \ - HIVE_RELATED_SETTINGS(M, ALIAS) \ - LIST_OF_ALL_FORMAT_SETTINGS(M, ALIAS) - -DECLARE_SETTINGS_TRAITS(HiveSettingsTraits, LIST_OF_HIVE_SETTINGS) +/// List of available types supported in HiveSettings object +#define HIVE_SETTINGS_SUPPORTED_TYPES(CLASS_NAME, M) \ + M(CLASS_NAME, ArrowCompression) \ + M(CLASS_NAME, Bool) \ + M(CLASS_NAME, CapnProtoEnumComparingMode) \ + M(CLASS_NAME, Char) \ + M(CLASS_NAME, DateTimeInputFormat) \ + M(CLASS_NAME, DateTimeOutputFormat) \ + M(CLASS_NAME, DateTimeOverflowBehavior) \ + M(CLASS_NAME, Double) \ + M(CLASS_NAME, EscapingRule) \ + M(CLASS_NAME, Float) \ + M(CLASS_NAME, IdentifierQuotingRule) \ + M(CLASS_NAME, IdentifierQuotingStyle) \ + M(CLASS_NAME, Int64) \ + M(CLASS_NAME, IntervalOutputFormat) \ + M(CLASS_NAME, MsgPackUUIDRepresentation) \ + M(CLASS_NAME, ORCCompression) \ + M(CLASS_NAME, ParquetCompression) \ + M(CLASS_NAME, ParquetVersion) \ + M(CLASS_NAME, SchemaInferenceMode) \ + M(CLASS_NAME, String) \ + M(CLASS_NAME, UInt64) \ + M(CLASS_NAME, UInt64Auto) \ + M(CLASS_NAME, URI) +HIVE_SETTINGS_SUPPORTED_TYPES(HiveSettings, DECLARE_SETTING_TRAIT) /** Settings for the Hive engine. * Could be loaded from a CREATE TABLE query (SETTINGS clause). */ -class HiveSettings : public BaseSettings +struct HiveSettings { -public: + HiveSettings(); + HiveSettings(const HiveSettings & settings); + HiveSettings(HiveSettings && settings) noexcept; + ~HiveSettings(); + + HIVE_SETTINGS_SUPPORTED_TYPES(HiveSettings, DECLARE_SETTING_SUBSCRIPT_OPERATOR) + void loadFromConfig(const String & config_elem, const Poco::Util::AbstractConfiguration & config); void loadFromQuery(ASTStorage & storage_def); + +private: + std::unique_ptr impl; }; } diff --git a/src/Storages/Hive/StorageHive.h b/src/Storages/Hive/StorageHive.h index e16df22e138..445e8ad765f 100644 --- a/src/Storages/Hive/StorageHive.h +++ b/src/Storages/Hive/StorageHive.h @@ -16,7 +16,7 @@ namespace DB { -class HiveSettings; +struct HiveSettings; /** * This class represents table engine for external hdfs files. * Read method is supported for now. diff --git a/src/Storages/Kafka/KafkaSettings.cpp b/src/Storages/Kafka/KafkaSettings.cpp index 8e6883736dd..c32cfdf31bf 100644 --- a/src/Storages/Kafka/KafkaSettings.cpp +++ b/src/Storages/Kafka/KafkaSettings.cpp @@ -1,8 +1,12 @@ -#include +#include +#include +#include #include -#include #include +#include +#include #include +#include namespace DB @@ -14,15 +18,84 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; } +#define KAFKA_RELATED_SETTINGS(DECLARE, ALIAS) \ + DECLARE(String, kafka_broker_list, "", "A comma-separated list of brokers for Kafka engine.", 0) \ + DECLARE(String, kafka_topic_list, "", "A list of Kafka topics.", 0) \ + DECLARE(String, kafka_group_name, "", "Client group id string. All Kafka consumers sharing the same group.id belong to the same group.", 0) \ + /* those are mapped to format factory settings */ \ + DECLARE(String, kafka_format, "", "The message format for Kafka engine.", 0) \ + DECLARE(String, kafka_schema, "", "Schema identifier (used by schema-based formats) for Kafka engine", 0) \ + DECLARE(UInt64, kafka_num_consumers, 1, "The number of consumers per table for Kafka engine.", 0) \ + /* default is = max_insert_block_size / kafka_num_consumers */ \ + DECLARE(UInt64, kafka_max_block_size, 0, "Number of row collected by poll(s) for flushing data from Kafka.", 0) \ + DECLARE(UInt64, kafka_skip_broken_messages, 0, "Skip at least this number of broken messages from Kafka topic per block", 0) \ + DECLARE(Bool, kafka_commit_every_batch, false, "Commit every consumed and handled batch instead of a single commit after writing a whole block", 0) \ + DECLARE(String, kafka_client_id, "", "Client identifier.", 0) \ + /* default is stream_poll_timeout_ms */ \ + DECLARE(Milliseconds, kafka_poll_timeout_ms, 0, "Timeout for single poll from Kafka.", 0) \ + DECLARE(UInt64, kafka_poll_max_batch_size, 0, "Maximum amount of messages to be polled in a single Kafka poll.", 0) \ + DECLARE(UInt64, kafka_consumers_pool_ttl_ms, 60'000, "TTL for Kafka consumers (in milliseconds)", 0) \ + /* default is stream_flush_interval_ms */ \ + DECLARE(Milliseconds, kafka_flush_interval_ms, 0, "Timeout for flushing data from Kafka.", 0) \ + DECLARE(Bool, kafka_thread_per_consumer, false, "Provide independent thread for each consumer", 0) \ + DECLARE(StreamingHandleErrorMode, kafka_handle_error_mode, StreamingHandleErrorMode::DEFAULT, "How to handle errors for Kafka engine. Possible values: default (throw an exception after kafka_skip_broken_messages broken messages), stream (save broken messages and errors in virtual columns _raw_message, _error).", 0) \ + DECLARE(Bool, kafka_commit_on_select, false, "Commit messages when select query is made", 0) \ + DECLARE(UInt64, kafka_max_rows_per_message, 1, "The maximum number of rows produced in one kafka message for row-based formats.", 0) \ + DECLARE(String, kafka_keeper_path, "", "The path to the table in ClickHouse Keeper", 0) \ + DECLARE(String, kafka_replica_name, "", "The replica name in ClickHouse Keeper", 0) \ + +#define OBSOLETE_KAFKA_SETTINGS(M, ALIAS) \ + MAKE_OBSOLETE(M, Char, kafka_row_delimiter, '\0') \ + + /** TODO: */ + /* https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md */ + /* https://github.com/edenhill/librdkafka/blob/v1.4.2/src/rdkafka_conf.c */ + +#define LIST_OF_KAFKA_SETTINGS(M, ALIAS) \ + KAFKA_RELATED_SETTINGS(M, ALIAS) \ + OBSOLETE_KAFKA_SETTINGS(M, ALIAS) \ + LIST_OF_ALL_FORMAT_SETTINGS(M, ALIAS) \ + +DECLARE_SETTINGS_TRAITS(KafkaSettingsTraits, LIST_OF_KAFKA_SETTINGS) IMPLEMENT_SETTINGS_TRAITS(KafkaSettingsTraits, LIST_OF_KAFKA_SETTINGS) +struct KafkaSettingsImpl : public BaseSettings +{ +}; + + +#define INITIALIZE_SETTING_EXTERN(TYPE, NAME, DEFAULT, DESCRIPTION, FLAGS) KafkaSettings##TYPE NAME = &KafkaSettingsImpl ::NAME; + +namespace KafkaSetting +{ +LIST_OF_KAFKA_SETTINGS(INITIALIZE_SETTING_EXTERN, SKIP_ALIAS) +} + +#undef INITIALIZE_SETTING_EXTERN + +KafkaSettings::KafkaSettings() : impl(std::make_unique()) +{ +} + +KafkaSettings::KafkaSettings(const KafkaSettings & settings) : impl(std::make_unique(*settings.impl)) +{ +} + +KafkaSettings::KafkaSettings(KafkaSettings && settings) noexcept : impl(std::make_unique(std::move(*settings.impl))) +{ +} + +KafkaSettings::~KafkaSettings() = default; + +KAFKA_SETTINGS_SUPPORTED_TYPES(KafkaSettings, IMPLEMENT_SETTING_SUBSCRIPT_OPERATOR) + void KafkaSettings::loadFromQuery(ASTStorage & storage_def) { if (storage_def.settings) { try { - applyChanges(storage_def.settings->changes); + impl->applyChanges(storage_def.settings->changes); } catch (Exception & e) { @@ -39,15 +112,46 @@ void KafkaSettings::loadFromQuery(ASTStorage & storage_def) } } +void KafkaSettings::loadFromNamedCollection(const MutableNamedCollectionPtr & named_collection) +{ + for (const auto & setting : impl->all()) + { + const auto & setting_name = setting.getName(); + if (named_collection->has(setting_name)) + impl->set(setting_name, named_collection->get(setting_name)); + } +} + void KafkaSettings::sanityCheck() const { - if (kafka_consumers_pool_ttl_ms < KAFKA_RESCHEDULE_MS) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "The value of 'kafka_consumers_pool_ttl_ms' ({}) cannot be less then rescheduled interval ({})", - kafka_consumers_pool_ttl_ms, KAFKA_RESCHEDULE_MS); + if (impl->kafka_consumers_pool_ttl_ms < KAFKA_RESCHEDULE_MS) + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "The value of 'kafka_consumers_pool_ttl_ms' ({}) cannot be less then rescheduled interval ({})", + impl->kafka_consumers_pool_ttl_ms, + KAFKA_RESCHEDULE_MS); - if (kafka_consumers_pool_ttl_ms > KAFKA_CONSUMERS_POOL_TTL_MS_MAX) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "The value of 'kafka_consumers_pool_ttl_ms' ({}) cannot be too big (greater then {}), since this may cause live memory leaks", - kafka_consumers_pool_ttl_ms, KAFKA_CONSUMERS_POOL_TTL_MS_MAX); + if (impl->kafka_consumers_pool_ttl_ms > KAFKA_CONSUMERS_POOL_TTL_MS_MAX) + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "The value of 'kafka_consumers_pool_ttl_ms' ({}) cannot be too big (greater then {}), since this may cause live memory leaks", + impl->kafka_consumers_pool_ttl_ms, + KAFKA_CONSUMERS_POOL_TTL_MS_MAX); } +SettingsChanges KafkaSettings::getFormatSettings() const +{ + SettingsChanges values; + + for (const auto & setting : *impl) + { + const auto & setting_name = setting.getName(); + + /// check for non-kafka-related settings + if (!setting_name.starts_with("kafka_")) + values.emplace_back(setting_name, setting.getValue()); + } + + return values; +} } diff --git a/src/Storages/Kafka/KafkaSettings.h b/src/Storages/Kafka/KafkaSettings.h index 6cf881634ad..8436b420e31 100644 --- a/src/Storages/Kafka/KafkaSettings.h +++ b/src/Storages/Kafka/KafkaSettings.h @@ -1,14 +1,15 @@ #pragma once -#include -#include +#include #include -#include - +#include +#include +#include namespace DB { class ASTStorage; +struct KafkaSettingsImpl; const auto KAFKA_RESCHEDULE_MS = 500; const auto KAFKA_CLEANUP_TIMEOUT_MS = 3000; @@ -17,55 +18,57 @@ const auto KAFKA_MAX_THREAD_WORK_DURATION_MS = 60000; // 10min const auto KAFKA_CONSUMERS_POOL_TTL_MS_MAX = 600'000; -#define KAFKA_RELATED_SETTINGS(M, ALIAS) \ - M(String, kafka_broker_list, "", "A comma-separated list of brokers for Kafka engine.", 0) \ - M(String, kafka_topic_list, "", "A list of Kafka topics.", 0) \ - M(String, kafka_group_name, "", "Client group id string. All Kafka consumers sharing the same group.id belong to the same group.", 0) \ - /* those are mapped to format factory settings */ \ - M(String, kafka_format, "", "The message format for Kafka engine.", 0) \ - M(String, kafka_schema, "", "Schema identifier (used by schema-based formats) for Kafka engine", 0) \ - M(UInt64, kafka_num_consumers, 1, "The number of consumers per table for Kafka engine.", 0) \ - /* default is = max_insert_block_size / kafka_num_consumers */ \ - M(UInt64, kafka_max_block_size, 0, "Number of row collected by poll(s) for flushing data from Kafka.", 0) \ - M(UInt64, kafka_skip_broken_messages, 0, "Skip at least this number of broken messages from Kafka topic per block", 0) \ - M(Bool, kafka_commit_every_batch, false, "Commit every consumed and handled batch instead of a single commit after writing a whole block", 0) \ - M(String, kafka_client_id, "", "Client identifier.", 0) \ - /* default is stream_poll_timeout_ms */ \ - M(Milliseconds, kafka_poll_timeout_ms, 0, "Timeout for single poll from Kafka.", 0) \ - M(UInt64, kafka_poll_max_batch_size, 0, "Maximum amount of messages to be polled in a single Kafka poll.", 0) \ - M(UInt64, kafka_consumers_pool_ttl_ms, 60'000, "TTL for Kafka consumers (in milliseconds)", 0) \ - /* default is stream_flush_interval_ms */ \ - M(Milliseconds, kafka_flush_interval_ms, 0, "Timeout for flushing data from Kafka.", 0) \ - M(Bool, kafka_thread_per_consumer, false, "Provide independent thread for each consumer", 0) \ - M(StreamingHandleErrorMode, kafka_handle_error_mode, StreamingHandleErrorMode::DEFAULT, "How to handle errors for Kafka engine. Possible values: default (throw an exception after rabbitmq_skip_broken_messages broken messages), stream (save broken messages and errors in virtual columns _raw_message, _error).", 0) \ - M(Bool, kafka_commit_on_select, false, "Commit messages when select query is made", 0) \ - M(UInt64, kafka_max_rows_per_message, 1, "The maximum number of rows produced in one kafka message for row-based formats.", 0) \ - M(String, kafka_keeper_path, "", "The path to the table in ClickHouse Keeper", 0) \ - M(String, kafka_replica_name, "", "The replica name in ClickHouse Keeper", 0) \ - -#define OBSOLETE_KAFKA_SETTINGS(M, ALIAS) \ - MAKE_OBSOLETE(M, Char, kafka_row_delimiter, '\0') \ - - /** TODO: */ - /* https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md */ - /* https://github.com/edenhill/librdkafka/blob/v1.4.2/src/rdkafka_conf.c */ - -#define LIST_OF_KAFKA_SETTINGS(M, ALIAS) \ - KAFKA_RELATED_SETTINGS(M, ALIAS) \ - OBSOLETE_KAFKA_SETTINGS(M, ALIAS) \ - LIST_OF_ALL_FORMAT_SETTINGS(M, ALIAS) \ - -DECLARE_SETTINGS_TRAITS(KafkaSettingsTraits, LIST_OF_KAFKA_SETTINGS) +/// List of available types supported in RabbitMQSettings object +#define KAFKA_SETTINGS_SUPPORTED_TYPES(CLASS_NAME, M) \ + M(CLASS_NAME, ArrowCompression) \ + M(CLASS_NAME, Bool) \ + M(CLASS_NAME, CapnProtoEnumComparingMode) \ + M(CLASS_NAME, Char) \ + M(CLASS_NAME, DateTimeInputFormat) \ + M(CLASS_NAME, DateTimeOutputFormat) \ + M(CLASS_NAME, DateTimeOverflowBehavior) \ + M(CLASS_NAME, Double) \ + M(CLASS_NAME, EscapingRule) \ + M(CLASS_NAME, Float) \ + M(CLASS_NAME, IdentifierQuotingRule) \ + M(CLASS_NAME, IdentifierQuotingStyle) \ + M(CLASS_NAME, Int64) \ + M(CLASS_NAME, IntervalOutputFormat) \ + M(CLASS_NAME, Milliseconds) \ + M(CLASS_NAME, MsgPackUUIDRepresentation) \ + M(CLASS_NAME, ORCCompression) \ + M(CLASS_NAME, ParquetCompression) \ + M(CLASS_NAME, ParquetVersion) \ + M(CLASS_NAME, SchemaInferenceMode) \ + M(CLASS_NAME, StreamingHandleErrorMode) \ + M(CLASS_NAME, String) \ + M(CLASS_NAME, UInt64) \ + M(CLASS_NAME, UInt64Auto) \ + M(CLASS_NAME, URI) +KAFKA_SETTINGS_SUPPORTED_TYPES(KafkaSettings, DECLARE_SETTING_TRAIT) /** Settings for the Kafka engine. * Could be loaded from a CREATE TABLE query (SETTINGS clause). */ -struct KafkaSettings : public BaseSettings +struct KafkaSettings { + KafkaSettings(); + KafkaSettings(const KafkaSettings & settings); + KafkaSettings(KafkaSettings && settings) noexcept; + ~KafkaSettings(); + + KAFKA_SETTINGS_SUPPORTED_TYPES(KafkaSettings, DECLARE_SETTING_SUBSCRIPT_OPERATOR) + void loadFromQuery(ASTStorage & storage_def); + void loadFromNamedCollection(const MutableNamedCollectionPtr & named_collection); + + SettingsChanges getFormatSettings() const; void sanityCheck() const; + +private: + std::unique_ptr impl; }; } diff --git a/src/Storages/Kafka/StorageKafka.cpp b/src/Storages/Kafka/StorageKafka.cpp index 0be0f12a4f1..294e983388e 100644 --- a/src/Storages/Kafka/StorageKafka.cpp +++ b/src/Storages/Kafka/StorageKafka.cpp @@ -4,7 +4,6 @@ #include #include #include -#include #include #include #include @@ -14,10 +13,10 @@ #include #include #include -#include #include #include #include +#include #include #include #include @@ -26,9 +25,6 @@ #include #include #include -#include -#include -#include #include #include #include @@ -38,15 +34,11 @@ #include #include #include -#include #include -#include -#include +#include #include #include -#include -#include namespace CurrentMetrics { @@ -75,6 +67,29 @@ namespace Setting extern const SettingsBool use_concurrency_control; } +namespace KafkaSetting +{ + extern const KafkaSettingsUInt64 input_format_allow_errors_num; + extern const KafkaSettingsFloat input_format_allow_errors_ratio; + extern const KafkaSettingsString kafka_broker_list; + extern const KafkaSettingsString kafka_client_id; + extern const KafkaSettingsBool kafka_commit_every_batch; + extern const KafkaSettingsBool kafka_commit_on_select; + extern const KafkaSettingsUInt64 kafka_consumers_pool_ttl_ms; + extern const KafkaSettingsMilliseconds kafka_flush_interval_ms; + extern const KafkaSettingsString kafka_format; + extern const KafkaSettingsString kafka_group_name; + extern const KafkaSettingsStreamingHandleErrorMode kafka_handle_error_mode; + extern const KafkaSettingsUInt64 kafka_max_block_size; + extern const KafkaSettingsUInt64 kafka_max_rows_per_message; + extern const KafkaSettingsUInt64 kafka_num_consumers; + extern const KafkaSettingsUInt64 kafka_poll_max_batch_size; + extern const KafkaSettingsMilliseconds kafka_poll_timeout_ms; + extern const KafkaSettingsString kafka_schema; + extern const KafkaSettingsBool kafka_thread_per_consumer; + extern const KafkaSettingsString kafka_topic_list; +} + namespace ErrorCodes { extern const int NOT_IMPLEMENTED; @@ -132,7 +147,7 @@ private: column_names, kafka_storage.log, 1, - kafka_storage.kafka_settings->kafka_commit_on_select)); + (*kafka_storage.kafka_settings)[KafkaSetting::kafka_commit_on_select])); } LOG_DEBUG(kafka_storage.log, "Starting reading {} streams", pipes.size()); @@ -155,36 +170,36 @@ StorageKafka::StorageKafka( , WithContext(context_->getGlobalContext()) , kafka_settings(std::move(kafka_settings_)) , macros_info{.table_id = table_id_} - , topics(StorageKafkaUtils::parseTopics(getContext()->getMacros()->expand(kafka_settings->kafka_topic_list.value, macros_info))) - , brokers(getContext()->getMacros()->expand(kafka_settings->kafka_broker_list.value, macros_info)) - , group(getContext()->getMacros()->expand(kafka_settings->kafka_group_name.value, macros_info)) + , topics(StorageKafkaUtils::parseTopics(getContext()->getMacros()->expand((*kafka_settings)[KafkaSetting::kafka_topic_list].value, macros_info))) + , brokers(getContext()->getMacros()->expand((*kafka_settings)[KafkaSetting::kafka_broker_list].value, macros_info)) + , group(getContext()->getMacros()->expand((*kafka_settings)[KafkaSetting::kafka_group_name].value, macros_info)) , client_id( - kafka_settings->kafka_client_id.value.empty() + (*kafka_settings)[KafkaSetting::kafka_client_id].value.empty() ? StorageKafkaUtils::getDefaultClientId(table_id_) - : getContext()->getMacros()->expand(kafka_settings->kafka_client_id.value, macros_info)) - , format_name(getContext()->getMacros()->expand(kafka_settings->kafka_format.value)) - , max_rows_per_message(kafka_settings->kafka_max_rows_per_message.value) - , schema_name(getContext()->getMacros()->expand(kafka_settings->kafka_schema.value, macros_info)) - , num_consumers(kafka_settings->kafka_num_consumers.value) + : getContext()->getMacros()->expand((*kafka_settings)[KafkaSetting::kafka_client_id].value, macros_info)) + , format_name(getContext()->getMacros()->expand((*kafka_settings)[KafkaSetting::kafka_format].value)) + , max_rows_per_message((*kafka_settings)[KafkaSetting::kafka_max_rows_per_message].value) + , schema_name(getContext()->getMacros()->expand((*kafka_settings)[KafkaSetting::kafka_schema].value, macros_info)) + , num_consumers((*kafka_settings)[KafkaSetting::kafka_num_consumers].value) , log(getLogger("StorageKafka (" + table_id_.table_name + ")")) - , intermediate_commit(kafka_settings->kafka_commit_every_batch.value) + , intermediate_commit((*kafka_settings)[KafkaSetting::kafka_commit_every_batch].value) , settings_adjustments(StorageKafkaUtils::createSettingsAdjustments(*kafka_settings, schema_name)) - , thread_per_consumer(kafka_settings->kafka_thread_per_consumer.value) + , thread_per_consumer((*kafka_settings)[KafkaSetting::kafka_thread_per_consumer].value) , collection_name(collection_name_) { kafka_settings->sanityCheck(); - if (kafka_settings->kafka_handle_error_mode == StreamingHandleErrorMode::STREAM) + if ((*kafka_settings)[KafkaSetting::kafka_handle_error_mode] == StreamingHandleErrorMode::STREAM) { - kafka_settings->input_format_allow_errors_num = 0; - kafka_settings->input_format_allow_errors_ratio = 0; + (*kafka_settings)[KafkaSetting::input_format_allow_errors_num] = 0; + (*kafka_settings)[KafkaSetting::input_format_allow_errors_ratio] = 0; } StorageInMemoryMetadata storage_metadata; storage_metadata.setColumns(columns_); storage_metadata.setComment(comment); setInMemoryMetadata(storage_metadata); - setVirtuals(StorageKafkaUtils::createVirtuals(kafka_settings->kafka_handle_error_mode)); + setVirtuals(StorageKafkaUtils::createVirtuals((*kafka_settings)[KafkaSetting::kafka_handle_error_mode])); auto task_count = thread_per_consumer ? num_consumers : 1; for (size_t i = 0; i < task_count; ++i) @@ -394,6 +409,10 @@ KafkaConsumerPtr StorageKafka::popConsumer(std::chrono::milliseconds timeout) return ret_consumer_ptr; } +StreamingHandleErrorMode StorageKafka::getStreamingHandleErrorMode() const +{ + return (*kafka_settings)[KafkaSetting::kafka_handle_error_mode]; +} KafkaConsumerPtr StorageKafka::createKafkaConsumer(size_t consumer_number) { @@ -433,7 +452,7 @@ cppkafka::Configuration StorageKafka::getProducerConfiguration() void StorageKafka::cleanConsumers() { - UInt64 ttl_usec = kafka_settings->kafka_consumers_pool_ttl_ms * 1'000; + UInt64 ttl_usec = (*kafka_settings)[KafkaSetting::kafka_consumers_pool_ttl_ms] * 1'000; std::unique_lock lock(mutex); std::chrono::milliseconds timeout(KAFKA_RESCHEDULE_MS); @@ -477,7 +496,7 @@ void StorageKafka::cleanConsumers() lock.lock(); } - ttl_usec = kafka_settings->kafka_consumers_pool_ttl_ms * 1'000; + ttl_usec = (*kafka_settings)[KafkaSetting::kafka_consumers_pool_ttl_ms] * 1'000; } LOG_TRACE(log, "Consumers cleanup thread finished"); @@ -485,13 +504,13 @@ void StorageKafka::cleanConsumers() size_t StorageKafka::getMaxBlockSize() const { - return kafka_settings->kafka_max_block_size.changed ? kafka_settings->kafka_max_block_size.value + return (*kafka_settings)[KafkaSetting::kafka_max_block_size].changed ? (*kafka_settings)[KafkaSetting::kafka_max_block_size].value : (getContext()->getSettingsRef()[Setting::max_insert_block_size].value / num_consumers); } size_t StorageKafka::getPollMaxBatchSize() const { - size_t batch_size = kafka_settings->kafka_poll_max_batch_size.changed ? kafka_settings->kafka_poll_max_batch_size.value + size_t batch_size = (*kafka_settings)[KafkaSetting::kafka_poll_max_batch_size].changed ? (*kafka_settings)[KafkaSetting::kafka_poll_max_batch_size].value : getContext()->getSettingsRef()[Setting::max_block_size].value; return std::min(batch_size,getMaxBlockSize()); @@ -499,7 +518,7 @@ size_t StorageKafka::getPollMaxBatchSize() const size_t StorageKafka::getPollTimeoutMillisecond() const { - return kafka_settings->kafka_poll_timeout_ms.changed ? kafka_settings->kafka_poll_timeout_ms.totalMilliseconds() + return (*kafka_settings)[KafkaSetting::kafka_poll_timeout_ms].changed ? (*kafka_settings)[KafkaSetting::kafka_poll_timeout_ms].totalMilliseconds() : getContext()->getSettingsRef()[Setting::stream_poll_timeout_ms].totalMilliseconds(); } @@ -624,8 +643,8 @@ bool StorageKafka::streamToViews() // Limit read batch to maximum block size to allow DDL StreamLocalLimits limits; - Poco::Timespan max_execution_time = kafka_settings->kafka_flush_interval_ms.changed - ? kafka_settings->kafka_flush_interval_ms + Poco::Timespan max_execution_time = (*kafka_settings)[KafkaSetting::kafka_flush_interval_ms].changed + ? (*kafka_settings)[KafkaSetting::kafka_flush_interval_ms] : getContext()->getSettingsRef()[Setting::stream_flush_interval_ms]; source->setTimeLimit(max_execution_time); diff --git a/src/Storages/Kafka/StorageKafka.h b/src/Storages/Kafka/StorageKafka.h index 966d818d675..a7b89a667e7 100644 --- a/src/Storages/Kafka/StorageKafka.h +++ b/src/Storages/Kafka/StorageKafka.h @@ -1,12 +1,12 @@ #pragma once -#include -#include #include +#include #include #include -#include +#include #include +#include #include @@ -19,6 +19,7 @@ namespace DB { +struct KafkaSettings; class ReadFromStorageKafka; class StorageSystemKafkaConsumers; class ThreadStatus; @@ -80,7 +81,7 @@ public: const auto & getFormatName() const { return format_name; } - StreamingHandleErrorMode getStreamingHandleErrorMode() const { return kafka_settings->kafka_handle_error_mode; } + StreamingHandleErrorMode getStreamingHandleErrorMode() const; struct SafeConsumers { diff --git a/src/Storages/Kafka/StorageKafka2.cpp b/src/Storages/Kafka/StorageKafka2.cpp index 0d8702d9e47..f583e73f47d 100644 --- a/src/Storages/Kafka/StorageKafka2.cpp +++ b/src/Storages/Kafka/StorageKafka2.cpp @@ -80,6 +80,28 @@ namespace Setting extern const SettingsMilliseconds stream_poll_timeout_ms; } +namespace KafkaSetting +{ + extern const KafkaSettingsUInt64 input_format_allow_errors_num; + extern const KafkaSettingsFloat input_format_allow_errors_ratio; + extern const KafkaSettingsString kafka_broker_list; + extern const KafkaSettingsString kafka_client_id; + extern const KafkaSettingsMilliseconds kafka_flush_interval_ms; + extern const KafkaSettingsString kafka_format; + extern const KafkaSettingsString kafka_group_name; + extern const KafkaSettingsStreamingHandleErrorMode kafka_handle_error_mode; + extern const KafkaSettingsString kafka_keeper_path; + extern const KafkaSettingsUInt64 kafka_max_block_size; + extern const KafkaSettingsUInt64 kafka_max_rows_per_message; + extern const KafkaSettingsUInt64 kafka_num_consumers; + extern const KafkaSettingsUInt64 kafka_poll_max_batch_size; + extern const KafkaSettingsMilliseconds kafka_poll_timeout_ms; + extern const KafkaSettingsString kafka_replica_name; + extern const KafkaSettingsString kafka_schema; + extern const KafkaSettingsBool kafka_thread_per_consumer; + extern const KafkaSettingsString kafka_topic_list; +} + namespace fs = std::filesystem; namespace ErrorCodes @@ -108,41 +130,41 @@ StorageKafka2::StorageKafka2( : IStorage(table_id_) , WithContext(context_->getGlobalContext()) , keeper(getContext()->getZooKeeper()) - , keeper_path(kafka_settings_->kafka_keeper_path.value) - , replica_path(keeper_path + "/replicas/" + kafka_settings_->kafka_replica_name.value) + , keeper_path((*kafka_settings_)[KafkaSetting::kafka_keeper_path].value) + , replica_path(keeper_path + "/replicas/" + (*kafka_settings_)[KafkaSetting::kafka_replica_name].value) , kafka_settings(std::move(kafka_settings_)) , macros_info{.table_id = table_id_} - , topics(StorageKafkaUtils::parseTopics(getContext()->getMacros()->expand(kafka_settings->kafka_topic_list.value, macros_info))) - , brokers(getContext()->getMacros()->expand(kafka_settings->kafka_broker_list.value, macros_info)) - , group(getContext()->getMacros()->expand(kafka_settings->kafka_group_name.value, macros_info)) + , topics(StorageKafkaUtils::parseTopics(getContext()->getMacros()->expand((*kafka_settings)[KafkaSetting::kafka_topic_list].value, macros_info))) + , brokers(getContext()->getMacros()->expand((*kafka_settings)[KafkaSetting::kafka_broker_list].value, macros_info)) + , group(getContext()->getMacros()->expand((*kafka_settings)[KafkaSetting::kafka_group_name].value, macros_info)) , client_id( - kafka_settings->kafka_client_id.value.empty() + (*kafka_settings)[KafkaSetting::kafka_client_id].value.empty() ? StorageKafkaUtils::getDefaultClientId(table_id_) - : getContext()->getMacros()->expand(kafka_settings->kafka_client_id.value, macros_info)) - , format_name(getContext()->getMacros()->expand(kafka_settings->kafka_format.value)) - , max_rows_per_message(kafka_settings->kafka_max_rows_per_message.value) - , schema_name(getContext()->getMacros()->expand(kafka_settings->kafka_schema.value, macros_info)) - , num_consumers(kafka_settings->kafka_num_consumers.value) + : getContext()->getMacros()->expand((*kafka_settings)[KafkaSetting::kafka_client_id].value, macros_info)) + , format_name(getContext()->getMacros()->expand((*kafka_settings)[KafkaSetting::kafka_format].value)) + , max_rows_per_message((*kafka_settings)[KafkaSetting::kafka_max_rows_per_message].value) + , schema_name(getContext()->getMacros()->expand((*kafka_settings)[KafkaSetting::kafka_schema].value, macros_info)) + , num_consumers((*kafka_settings)[KafkaSetting::kafka_num_consumers].value) , log(getLogger("StorageKafka2 (" + table_id_.getNameForLogs() + ")")) , semaphore(0, static_cast(num_consumers)) , settings_adjustments(StorageKafkaUtils::createSettingsAdjustments(*kafka_settings, schema_name)) - , thread_per_consumer(kafka_settings->kafka_thread_per_consumer.value) + , thread_per_consumer((*kafka_settings)[KafkaSetting::kafka_thread_per_consumer].value) , collection_name(collection_name_) , active_node_identifier(toString(ServerUUID::get())) { - if (kafka_settings->kafka_num_consumers > 1 && !thread_per_consumer) + if ((*kafka_settings)[KafkaSetting::kafka_num_consumers] > 1 && !thread_per_consumer) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "With multiple consumers, it is required to use `kafka_thread_per_consumer` setting"); - if (kafka_settings->kafka_handle_error_mode == StreamingHandleErrorMode::STREAM) + if ((*kafka_settings)[KafkaSetting::kafka_handle_error_mode] == StreamingHandleErrorMode::STREAM) { - kafka_settings->input_format_allow_errors_num = 0; - kafka_settings->input_format_allow_errors_ratio = 0; + (*kafka_settings)[KafkaSetting::input_format_allow_errors_num] = 0; + (*kafka_settings)[KafkaSetting::input_format_allow_errors_ratio] = 0; } StorageInMemoryMetadata storage_metadata; storage_metadata.setColumns(columns_); storage_metadata.setComment(comment); setInMemoryMetadata(storage_metadata); - setVirtuals(StorageKafkaUtils::createVirtuals(kafka_settings->kafka_handle_error_mode)); + setVirtuals(StorageKafkaUtils::createVirtuals((*kafka_settings)[KafkaSetting::kafka_handle_error_mode])); auto task_count = thread_per_consumer ? num_consumers : 1; for (size_t i = 0; i < task_count; ++i) @@ -161,6 +183,8 @@ StorageKafka2::StorageKafka2( activating_task->deactivate(); } +StorageKafka2::~StorageKafka2() = default; + void StorageKafka2::partialShutdown() { // This is called in a background task within a catch block, thus this function shouldn't throw @@ -341,6 +365,11 @@ Pipe StorageKafka2::read( throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Direct read from the new Kafka storage is not implemented"); } +StreamingHandleErrorMode StorageKafka2::getHandleKafkaErrorMode() const +{ + return (*kafka_settings)[KafkaSetting::kafka_handle_error_mode]; +} + SinkToStoragePtr StorageKafka2::write(const ASTPtr &, const StorageMetadataPtr & metadata_snapshot, ContextPtr local_context, bool /*async_insert*/) @@ -450,13 +479,13 @@ cppkafka::Configuration StorageKafka2::getProducerConfiguration() size_t StorageKafka2::getMaxBlockSize() const { - return kafka_settings->kafka_max_block_size.changed ? kafka_settings->kafka_max_block_size.value + return (*kafka_settings)[KafkaSetting::kafka_max_block_size].changed ? (*kafka_settings)[KafkaSetting::kafka_max_block_size].value : (getContext()->getSettingsRef()[Setting::max_insert_block_size].value / num_consumers); } size_t StorageKafka2::getPollMaxBatchSize() const { - size_t batch_size = kafka_settings->kafka_poll_max_batch_size.changed ? kafka_settings->kafka_poll_max_batch_size.value + size_t batch_size = (*kafka_settings)[KafkaSetting::kafka_poll_max_batch_size].changed ? (*kafka_settings)[KafkaSetting::kafka_poll_max_batch_size].value : getContext()->getSettingsRef()[Setting::max_block_size].value; return std::min(batch_size, getMaxBlockSize()); @@ -464,7 +493,7 @@ size_t StorageKafka2::getPollMaxBatchSize() const size_t StorageKafka2::getPollTimeoutMillisecond() const { - return kafka_settings->kafka_poll_timeout_ms.changed ? kafka_settings->kafka_poll_timeout_ms.totalMilliseconds() + return (*kafka_settings)[KafkaSetting::kafka_poll_timeout_ms].changed ? (*kafka_settings)[KafkaSetting::kafka_poll_timeout_ms].totalMilliseconds() : getContext()->getSettingsRef()[Setting::stream_poll_timeout_ms].totalMilliseconds(); } @@ -732,7 +761,7 @@ StorageKafka2::lockTopicPartitions(zkutil::ZooKeeper & keeper_to_use, const Topi const auto lock_file_path = String(topic_partition_path / lock_file_name); LOG_TRACE(log, "Creating locking ops for: {}", lock_file_path); ops.push_back(zkutil::makeCreateRequest(topic_partition_path, "", zkutil::CreateMode::Persistent, ignore_if_exists)); - ops.push_back(zkutil::makeCreateRequest(lock_file_path, kafka_settings->kafka_replica_name.value, zkutil::CreateMode::Ephemeral)); + ops.push_back(zkutil::makeCreateRequest(lock_file_path, (*kafka_settings)[KafkaSetting::kafka_replica_name].value, zkutil::CreateMode::Ephemeral)); } Coordination::Responses responses; @@ -815,7 +844,7 @@ StorageKafka2::PolledBatchInfo StorageKafka2::pollConsumer( // otherwise external iteration will reuse that and logic will became even more fuzzy MutableColumns virtual_columns = virtual_header.cloneEmptyColumns(); - auto put_error_to_stream = kafka_settings->kafka_handle_error_mode == StreamingHandleErrorMode::STREAM; + auto put_error_to_stream = (*kafka_settings)[KafkaSetting::kafka_handle_error_mode] == StreamingHandleErrorMode::STREAM; EmptyReadBuffer empty_buf; auto input_format = FormatFactory::instance().getInput( @@ -858,8 +887,8 @@ StorageKafka2::PolledBatchInfo StorageKafka2::pollConsumer( StreamingFormatExecutor executor(non_virtual_header, input_format, std::move(on_error)); - Poco::Timespan max_execution_time = kafka_settings->kafka_flush_interval_ms.changed - ? kafka_settings->kafka_flush_interval_ms + Poco::Timespan max_execution_time = (*kafka_settings)[KafkaSetting::kafka_flush_interval_ms].changed + ? (*kafka_settings)[KafkaSetting::kafka_flush_interval_ms] : getContext()->getSettingsRef()[Setting::stream_flush_interval_ms]; const auto check_time_limit = [&max_execution_time, &total_stopwatch]() diff --git a/src/Storages/Kafka/StorageKafka2.h b/src/Storages/Kafka/StorageKafka2.h index f85fedb316a..062cb742b74 100644 --- a/src/Storages/Kafka/StorageKafka2.h +++ b/src/Storages/Kafka/StorageKafka2.h @@ -2,10 +2,10 @@ #include #include +#include #include #include #include -#include #include #include #include @@ -29,6 +29,7 @@ class Configuration; namespace DB { +struct KafkaSettings; template struct KafkaInterceptors; @@ -63,6 +64,8 @@ public: std::unique_ptr kafka_settings_, const String & collection_name_); + ~StorageKafka2() override; + std::string getName() const override { return "Kafka"; } bool noPushingToViews() const override { return true; } @@ -89,7 +92,7 @@ public: const auto & getFormatName() const { return format_name; } - StreamingHandleErrorMode getHandleKafkaErrorMode() const { return kafka_settings->kafka_handle_error_mode; } + StreamingHandleErrorMode getHandleKafkaErrorMode() const; private: using TopicPartition = KafkaConsumer2::TopicPartition; diff --git a/src/Storages/Kafka/StorageKafkaUtils.cpp b/src/Storages/Kafka/StorageKafkaUtils.cpp index 19a6dbc3a7f..dd954d6a7c2 100644 --- a/src/Storages/Kafka/StorageKafkaUtils.cpp +++ b/src/Storages/Kafka/StorageKafkaUtils.cpp @@ -53,6 +53,32 @@ namespace Setting extern const SettingsBool kafka_disable_num_consumers_limit; } +namespace KafkaSetting +{ + extern const KafkaSettingsUInt64 input_format_allow_errors_num; + extern const KafkaSettingsFloat input_format_allow_errors_ratio; + extern const KafkaSettingsBool input_format_skip_unknown_fields; + extern const KafkaSettingsString kafka_broker_list; + extern const KafkaSettingsString kafka_client_id; + extern const KafkaSettingsBool kafka_commit_every_batch; + extern const KafkaSettingsBool kafka_commit_on_select; + extern const KafkaSettingsMilliseconds kafka_flush_interval_ms; + extern const KafkaSettingsString kafka_format; + extern const KafkaSettingsString kafka_group_name; + extern const KafkaSettingsStreamingHandleErrorMode kafka_handle_error_mode; + extern const KafkaSettingsString kafka_keeper_path; + extern const KafkaSettingsUInt64 kafka_max_block_size; + extern const KafkaSettingsUInt64 kafka_max_rows_per_message; + extern const KafkaSettingsUInt64 kafka_num_consumers; + extern const KafkaSettingsUInt64 kafka_poll_max_batch_size; + extern const KafkaSettingsMilliseconds kafka_poll_timeout_ms; + extern const KafkaSettingsString kafka_replica_name; + extern const KafkaSettingsString kafka_schema; + extern const KafkaSettingsUInt64 kafka_skip_broken_messages; + extern const KafkaSettingsBool kafka_thread_per_consumer; + extern const KafkaSettingsString kafka_topic_list; +} + using namespace std::chrono_literals; namespace ErrorCodes @@ -75,12 +101,7 @@ void registerStorageKafka(StorageFactory & factory) String collection_name; if (auto named_collection = tryGetNamedCollectionWithOverrides(args.engine_args, args.getLocalContext())) { - for (const auto & setting : kafka_settings->all()) - { - const auto & setting_name = setting.getName(); - if (named_collection->has(setting_name)) - kafka_settings->set(setting_name, named_collection->get(setting_name)); - } + kafka_settings->loadFromNamedCollection(named_collection); collection_name = assert_cast(args.engine_args[0].get())->name(); } @@ -92,7 +113,7 @@ void registerStorageKafka(StorageFactory & factory) // Check arguments and settings #define CHECK_KAFKA_STORAGE_ARGUMENT(ARG_NUM, PAR_NAME, EVAL) \ /* One of the four required arguments is not specified */ \ - if (args_count < (ARG_NUM) && (ARG_NUM) <= 4 && !kafka_settings->PAR_NAME.changed) \ + if (args_count < (ARG_NUM) && (ARG_NUM) <= 4 && !(*kafka_settings)[KafkaSetting::PAR_NAME].changed) \ { \ throw Exception( \ ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, \ @@ -103,7 +124,7 @@ void registerStorageKafka(StorageFactory & factory) if (args_count >= (ARG_NUM)) \ { \ /* The same argument is given in two places */ \ - if (has_settings && kafka_settings->PAR_NAME.changed) \ + if (has_settings && (*kafka_settings)[KafkaSetting::PAR_NAME].changed) \ throw Exception( \ ErrorCodes::BAD_ARGUMENTS, \ "The argument №{} of storage Kafka " \ @@ -121,7 +142,7 @@ void registerStorageKafka(StorageFactory & factory) engine_args[(ARG_NUM)-1] \ = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args[(ARG_NUM)-1], args.getLocalContext()); \ } \ - kafka_settings->PAR_NAME = engine_args[(ARG_NUM)-1]->as().value; \ + (*kafka_settings)[KafkaSetting::PAR_NAME] = engine_args[(ARG_NUM)-1]->as().value; \ } /** Arguments of engine is following: @@ -145,7 +166,6 @@ void registerStorageKafka(StorageFactory & factory) CHECK_KAFKA_STORAGE_ARGUMENT(2, kafka_topic_list, 1) CHECK_KAFKA_STORAGE_ARGUMENT(3, kafka_group_name, 2) CHECK_KAFKA_STORAGE_ARGUMENT(4, kafka_format, 2) - CHECK_KAFKA_STORAGE_ARGUMENT(5, kafka_row_delimiter, 2) CHECK_KAFKA_STORAGE_ARGUMENT(6, kafka_schema, 2) CHECK_KAFKA_STORAGE_ARGUMENT(7, kafka_num_consumers, 0) CHECK_KAFKA_STORAGE_ARGUMENT(8, kafka_max_block_size, 0) @@ -162,7 +182,7 @@ void registerStorageKafka(StorageFactory & factory) #undef CHECK_KAFKA_STORAGE_ARGUMENT - auto num_consumers = kafka_settings->kafka_num_consumers.value; + auto num_consumers = (*kafka_settings)[KafkaSetting::kafka_num_consumers].value; auto max_consumers = std::max(getNumberOfCPUCoresToUse(), 16); if (!args.getLocalContext()->getSettingsRef()[Setting::kafka_disable_num_consumers_limit] && num_consumers > max_consumers) @@ -185,12 +205,12 @@ void registerStorageKafka(StorageFactory & factory) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Number of consumers can not be lower than 1"); } - if (kafka_settings->kafka_max_block_size.changed && kafka_settings->kafka_max_block_size.value < 1) + if ((*kafka_settings)[KafkaSetting::kafka_max_block_size].changed && (*kafka_settings)[KafkaSetting::kafka_max_block_size].value < 1) { throw Exception(ErrorCodes::BAD_ARGUMENTS, "kafka_max_block_size can not be lower than 1"); } - if (kafka_settings->kafka_poll_max_batch_size.changed && kafka_settings->kafka_poll_max_batch_size.value < 1) + if ((*kafka_settings)[KafkaSetting::kafka_poll_max_batch_size].changed && (*kafka_settings)[KafkaSetting::kafka_poll_max_batch_size].value < 1) { throw Exception(ErrorCodes::BAD_ARGUMENTS, "kafka_poll_max_batch_size can not be lower than 1"); } @@ -211,8 +231,8 @@ void registerStorageKafka(StorageFactory & factory) "See https://clickhouse.com/docs/en/engines/table-engines/integrations/kafka/#configuration"); } - const auto has_keeper_path = kafka_settings->kafka_keeper_path.changed && !kafka_settings->kafka_keeper_path.value.empty(); - const auto has_replica_name = kafka_settings->kafka_replica_name.changed && !kafka_settings->kafka_replica_name.value.empty(); + const auto has_keeper_path = (*kafka_settings)[KafkaSetting::kafka_keeper_path].changed && !(*kafka_settings)[KafkaSetting::kafka_keeper_path].value.empty(); + const auto has_replica_name = (*kafka_settings)[KafkaSetting::kafka_replica_name].changed && !(*kafka_settings)[KafkaSetting::kafka_replica_name].value.empty(); if (!has_keeper_path && !has_replica_name) return std::make_shared( @@ -248,18 +268,18 @@ void registerStorageKafka(StorageFactory & factory) info.table_id = args.table_id; // We could probably unfold UUID here too, but let's keep it similar to ReplicatedMergeTree, which doesn't do the unfolding. info.table_id.uuid = UUIDHelpers::Nil; - kafka_settings->kafka_keeper_path.value = context->getMacros()->expand(kafka_settings->kafka_keeper_path.value, info); + (*kafka_settings)[KafkaSetting::kafka_keeper_path].value = context->getMacros()->expand((*kafka_settings)[KafkaSetting::kafka_keeper_path].value, info); info.level = 0; - kafka_settings->kafka_replica_name.value = context->getMacros()->expand(kafka_settings->kafka_replica_name.value, info); + (*kafka_settings)[KafkaSetting::kafka_replica_name].value = context->getMacros()->expand((*kafka_settings)[KafkaSetting::kafka_replica_name].value, info); } auto * settings_query = args.storage_def->settings; chassert(has_settings && "Unexpected settings query in StorageKafka"); - settings_query->changes.setSetting("kafka_keeper_path", kafka_settings->kafka_keeper_path.value); - settings_query->changes.setSetting("kafka_replica_name", kafka_settings->kafka_replica_name.value); + settings_query->changes.setSetting("kafka_keeper_path", (*kafka_settings)[KafkaSetting::kafka_keeper_path].value); + settings_query->changes.setSetting("kafka_replica_name", (*kafka_settings)[KafkaSetting::kafka_replica_name].value); // Expand other macros (such as {replica}). We do not expand them on previous step to make possible copying metadata files between replicas. // Disable expanding {shard} macro, because it can lead to incorrect behavior and it doesn't make sense to shard Kafka tables. @@ -273,11 +293,11 @@ void registerStorageKafka(StorageFactory & factory) } if (!allow_uuid_macro) info.table_id.uuid = UUIDHelpers::Nil; - kafka_settings->kafka_keeper_path.value = context->getMacros()->expand(kafka_settings->kafka_keeper_path.value, info); + (*kafka_settings)[KafkaSetting::kafka_keeper_path].value = context->getMacros()->expand((*kafka_settings)[KafkaSetting::kafka_keeper_path].value, info); info.level = 0; info.table_id.uuid = UUIDHelpers::Nil; - kafka_settings->kafka_replica_name.value = context->getMacros()->expand(kafka_settings->kafka_replica_name.value, info); + (*kafka_settings)[KafkaSetting::kafka_replica_name].value = context->getMacros()->expand((*kafka_settings)[KafkaSetting::kafka_replica_name].value, info); return std::make_shared( args.table_id, args.getContext(), args.columns, args.comment, std::move(kafka_settings), collection_name); @@ -369,31 +389,27 @@ SettingsChanges createSettingsAdjustments(KafkaSettings & kafka_settings, const { SettingsChanges result; // Needed for backward compatibility - if (!kafka_settings.input_format_skip_unknown_fields.changed) + if (!kafka_settings[KafkaSetting::input_format_skip_unknown_fields].changed) { // Always skip unknown fields regardless of the context (JSON or TSKV) - kafka_settings.input_format_skip_unknown_fields = true; + kafka_settings[KafkaSetting::input_format_skip_unknown_fields] = true; } - if (!kafka_settings.input_format_allow_errors_ratio.changed) + if (!kafka_settings[KafkaSetting::input_format_allow_errors_ratio].changed) { - kafka_settings.input_format_allow_errors_ratio = 0.; + kafka_settings[KafkaSetting::input_format_allow_errors_ratio] = 0.; } - if (!kafka_settings.input_format_allow_errors_num.changed) + if (!kafka_settings[KafkaSetting::input_format_allow_errors_num].changed) { - kafka_settings.input_format_allow_errors_num = kafka_settings.kafka_skip_broken_messages.value; + kafka_settings[KafkaSetting::input_format_allow_errors_num] = kafka_settings[KafkaSetting::kafka_skip_broken_messages].value; } if (!schema_name.empty()) result.emplace_back("format_schema", schema_name); - for (const auto & setting : kafka_settings) - { - const auto & name = setting.getName(); - if (name.find("kafka_") == std::string::npos) - result.emplace_back(name, setting.getValue()); - } + auto kafka_format_settings = kafka_settings.getFormatSettings(); + result.insert(result.end(), kafka_format_settings.begin(), kafka_format_settings.end()); return result; } diff --git a/src/Storages/Kafka/StorageKafkaUtils.h b/src/Storages/Kafka/StorageKafkaUtils.h index cc956dde78d..5f681e94077 100644 --- a/src/Storages/Kafka/StorageKafkaUtils.h +++ b/src/Storages/Kafka/StorageKafkaUtils.h @@ -2,6 +2,7 @@ #include #include +#include #include #include #include @@ -9,7 +10,6 @@ #include #include #include -#include #include #include diff --git a/src/Storages/MaterializedView/RefreshSettings.cpp b/src/Storages/MaterializedView/RefreshSettings.cpp index 079b35d6152..6e130affb78 100644 --- a/src/Storages/MaterializedView/RefreshSettings.cpp +++ b/src/Storages/MaterializedView/RefreshSettings.cpp @@ -5,11 +5,11 @@ namespace DB { -#define LIST_OF_REFRESH_SETTINGS(M, ALIAS) \ - M(Int64, refresh_retries, 2, "How many times to retry refresh query if it fails. If all attempts fail, wait for the next refresh time according to schedule. 0 to disable retries. -1 for infinite retries.", 0) \ - M(UInt64, refresh_retry_initial_backoff_ms, 100, "Delay before the first retry if refresh query fails (if refresh_retries setting is not zero). Each subsequent retry doubles the delay, up to refresh_retry_max_backoff_ms.", 0) \ - M(UInt64, refresh_retry_max_backoff_ms, 60'000, "Limit on the exponential growth of delay between refresh attempts, if they keep failing and refresh_retries is positive.", 0) \ - M(Bool, all_replicas, /* do not change or existing tables will break */ false, "If the materialized view is in a Replicated database, and APPEND is enabled, this flag controls whether all replicas or one replica will refresh.", 0) \ +#define LIST_OF_REFRESH_SETTINGS(DECLARE, ALIAS) \ + DECLARE(Int64, refresh_retries, 2, "How many times to retry refresh query if it fails. If all attempts fail, wait for the next refresh time according to schedule. 0 to disable retries. -1 for infinite retries.", 0) \ + DECLARE(UInt64, refresh_retry_initial_backoff_ms, 100, "Delay before the first retry if refresh query fails (if refresh_retries setting is not zero). Each subsequent retry doubles the delay, up to refresh_retry_max_backoff_ms.", 0) \ + DECLARE(UInt64, refresh_retry_max_backoff_ms, 60'000, "Limit on the exponential growth of delay between refresh attempts, if they keep failing and refresh_retries is positive.", 0) \ + DECLARE(Bool, all_replicas, /* do not change or existing tables will break */ false, "If the materialized view is in a Replicated database, and APPEND is enabled, this flag controls whether all replicas or one replica will refresh.", 0) \ DECLARE_SETTINGS_TRAITS(RefreshSettingsTraits, LIST_OF_REFRESH_SETTINGS) IMPLEMENT_SETTINGS_TRAITS(RefreshSettingsTraits, LIST_OF_REFRESH_SETTINGS) diff --git a/src/Storages/MemorySettings.cpp b/src/Storages/MemorySettings.cpp index 30ae4e12668..032d673a9e3 100644 --- a/src/Storages/MemorySettings.cpp +++ b/src/Storages/MemorySettings.cpp @@ -1,6 +1,8 @@ -#include +#include +#include #include #include +#include #include @@ -13,7 +15,51 @@ namespace ErrorCodes extern const int SETTING_CONSTRAINT_VIOLATION; } -IMPLEMENT_SETTINGS_TRAITS(memorySettingsTraits, MEMORY_SETTINGS) +#define MEMORY_SETTINGS(DECLARE, ALIAS) \ + DECLARE(Bool, compress, false, "Compress data in memory", 0) \ + DECLARE(UInt64, min_rows_to_keep, 0, "Minimum block size (in rows) to retain in Memory table buffer.", 0) \ + DECLARE(UInt64, max_rows_to_keep, 0, "Maximum block size (in rows) to retain in Memory table buffer.", 0) \ + DECLARE(UInt64, min_bytes_to_keep, 0, "Minimum block size (in bytes) to retain in Memory table buffer.", 0) \ + DECLARE(UInt64, max_bytes_to_keep, 0, "Maximum block size (in bytes) to retain in Memory table buffer.", 0) \ + +DECLARE_SETTINGS_TRAITS(MemorySettingsTraits, MEMORY_SETTINGS) +IMPLEMENT_SETTINGS_TRAITS(MemorySettingsTraits, MEMORY_SETTINGS) + + +struct MemorySettingsImpl : public BaseSettings +{ +}; + +#define INITIALIZE_SETTING_EXTERN(TYPE, NAME, DEFAULT, DESCRIPTION, FLAGS) MemorySettings##TYPE NAME = &MemorySettingsImpl ::NAME; + +namespace MemorySetting +{ +MEMORY_SETTINGS(INITIALIZE_SETTING_EXTERN, SKIP_ALIAS) +} + +#undef INITIALIZE_SETTING_EXTERN + +MemorySettings::MemorySettings() : impl(std::make_unique()) +{ +} + +MemorySettings::MemorySettings(const MemorySettings & settings) : impl(std::make_unique(*settings.impl)) +{ +} + +MemorySettings::MemorySettings(MemorySettings && settings) noexcept : impl(std::make_unique(std::move(*settings.impl))) +{ +} + +MemorySettings::~MemorySettings() = default; + +MemorySettings & MemorySettings::operator=(MemorySettings && settings) noexcept +{ + *impl = std::move(*settings.impl); + return *this; +} + +MEMORY_SETTINGS_SUPPORTED_TYPES(MemorySettings, IMPLEMENT_SETTING_SUBSCRIPT_OPERATOR) void MemorySettings::loadFromQuery(ASTStorage & storage_def) { @@ -21,7 +67,7 @@ void MemorySettings::loadFromQuery(ASTStorage & storage_def) { try { - applyChanges(storage_def.settings->changes); + impl->applyChanges(storage_def.settings->changes); } catch (Exception & e) { @@ -36,7 +82,7 @@ ASTPtr MemorySettings::getSettingsChangesQuery() { auto settings_ast = std::make_shared(); settings_ast->is_standalone = false; - for (const auto & change : changes()) + for (const auto & change : impl->changes()) settings_ast->changes.push_back(change); return settings_ast; @@ -44,19 +90,25 @@ ASTPtr MemorySettings::getSettingsChangesQuery() void MemorySettings::sanityCheck() const { - if (min_bytes_to_keep > max_bytes_to_keep) - throw Exception(ErrorCodes::SETTING_CONSTRAINT_VIOLATION, - "Setting `min_bytes_to_keep` cannot be higher than the `max_bytes_to_keep`. `min_bytes_to_keep`: {}, `max_bytes_to_keep`: {}", - min_bytes_to_keep, - max_bytes_to_keep); + if (impl->min_bytes_to_keep > impl->max_bytes_to_keep) + throw Exception( + ErrorCodes::SETTING_CONSTRAINT_VIOLATION, + "Setting `min_bytes_to_keep` cannot be higher than the `max_bytes_to_keep`. `min_bytes_to_keep`: {}, `max_bytes_to_keep`: {}", + impl->min_bytes_to_keep, + impl->max_bytes_to_keep); - if (min_rows_to_keep > max_rows_to_keep) - throw Exception(ErrorCodes::SETTING_CONSTRAINT_VIOLATION, - "Setting `min_rows_to_keep` cannot be higher than the `max_rows_to_keep`. `min_rows_to_keep`: {}, `max_rows_to_keep`: {}", - min_rows_to_keep, - max_rows_to_keep); + if (impl->min_rows_to_keep > impl->max_rows_to_keep) + throw Exception( + ErrorCodes::SETTING_CONSTRAINT_VIOLATION, + "Setting `min_rows_to_keep` cannot be higher than the `max_rows_to_keep`. `min_rows_to_keep`: {}, `max_rows_to_keep`: {}", + impl->min_rows_to_keep, + impl->max_rows_to_keep); } +void MemorySettings::applyChanges(const DB::SettingsChanges & changes) +{ + impl->applyChanges(changes); +} } diff --git a/src/Storages/MemorySettings.h b/src/Storages/MemorySettings.h index f650746c4b2..d62f284e421 100644 --- a/src/Storages/MemorySettings.h +++ b/src/Storages/MemorySettings.h @@ -1,32 +1,46 @@ #pragma once -#include -#include - +#include +#include namespace DB { class ASTStorage; +struct MemorySettingsImpl; +class IAST; +using ASTPtr = std::shared_ptr; -#define MEMORY_SETTINGS(M, ALIAS) \ - M(Bool, compress, false, "Compress data in memory", 0) \ - M(UInt64, min_rows_to_keep, 0, "Minimum block size (in rows) to retain in Memory table buffer.", 0) \ - M(UInt64, max_rows_to_keep, 0, "Maximum block size (in rows) to retain in Memory table buffer.", 0) \ - M(UInt64, min_bytes_to_keep, 0, "Minimum block size (in bytes) to retain in Memory table buffer.", 0) \ - M(UInt64, max_bytes_to_keep, 0, "Maximum block size (in bytes) to retain in Memory table buffer.", 0) \ +class SettingsChanges; -DECLARE_SETTINGS_TRAITS(memorySettingsTraits, MEMORY_SETTINGS) +/// List of available types supported in MemorySettings object +#define MEMORY_SETTINGS_SUPPORTED_TYPES(CLASS_NAME, M) \ + M(CLASS_NAME, Bool) \ + M(CLASS_NAME, UInt64) +MEMORY_SETTINGS_SUPPORTED_TYPES(MemorySettings, DECLARE_SETTING_TRAIT) /** Settings for the Memory engine. * Could be loaded from a CREATE TABLE query (SETTINGS clause). */ -struct MemorySettings : public BaseSettings +struct MemorySettings { + MemorySettings(); + MemorySettings(const MemorySettings & settings); + MemorySettings(MemorySettings && settings) noexcept; + ~MemorySettings(); + + MemorySettings & operator=(MemorySettings && settings) noexcept; + + MEMORY_SETTINGS_SUPPORTED_TYPES(MemorySettings, DECLARE_SETTING_SUBSCRIPT_OPERATOR) + void loadFromQuery(ASTStorage & storage_def); ASTPtr getSettingsChangesQuery(); void sanityCheck() const; + void applyChanges(const SettingsChanges & changes); + +private: + std::unique_ptr impl; }; } diff --git a/src/Storages/MergeTree/MergeFromLogEntryTask.cpp b/src/Storages/MergeTree/MergeFromLogEntryTask.cpp index 56d7133dfc3..fa6640409e5 100644 --- a/src/Storages/MergeTree/MergeFromLogEntryTask.cpp +++ b/src/Storages/MergeTree/MergeFromLogEntryTask.cpp @@ -335,6 +335,10 @@ ReplicatedMergeMutateTaskBase::PrepareResult MergeFromLogEntryTask::prepare() future_merged_part, task_context); + storage.writePartLog( + PartLogElement::MERGE_PARTS_START, {}, 0, + entry.new_part_name, part, parts, merge_mutate_entry.get(), {}); + transaction_ptr = std::make_unique(storage, NO_TRANSACTION_RAW); merge_task = storage.merger_mutator.mergePartsToTemporaryPart( @@ -352,7 +356,6 @@ ReplicatedMergeMutateTaskBase::PrepareResult MergeFromLogEntryTask::prepare() storage.merging_params, NO_TRANSACTION_PTR); - /// Adjust priority for (auto & item : future_merged_part->parts) priority.value += item->getBytesOnDisk(); diff --git a/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp b/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp index be44177847c..f7b52d2216d 100644 --- a/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp +++ b/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp @@ -92,6 +92,10 @@ void MergePlainMergeTreeTask::prepare() future_part, task_context); + storage.writePartLog( + PartLogElement::MERGE_PARTS_START, {}, 0, + future_part->name, new_part, future_part->parts, merge_list_entry.get(), {}); + write_part_log = [this] (const ExecutionStatus & execution_status) { auto profile_counters_snapshot = std::make_shared(profile_counters.getPartiallyAtomicSnapshot()); @@ -121,19 +125,19 @@ void MergePlainMergeTreeTask::prepare() }; merge_task = storage.merger_mutator.mergePartsToTemporaryPart( - future_part, - metadata_snapshot, - merge_list_entry.get(), - {} /* projection_merge_list_element */, - table_lock_holder, - time(nullptr), - task_context, - merge_mutate_entry->tagger->reserved_space, - deduplicate, - deduplicate_by_columns, - cleanup, - storage.merging_params, - txn); + future_part, + metadata_snapshot, + merge_list_entry.get(), + {} /* projection_merge_list_element */, + table_lock_holder, + time(nullptr), + task_context, + merge_mutate_entry->tagger->reserved_space, + deduplicate, + deduplicate_by_columns, + cleanup, + storage.merging_params, + txn); } diff --git a/src/Storages/MergeTree/MergeSelectors/TrivialMergeSelector.cpp b/src/Storages/MergeTree/MergeSelectors/TrivialMergeSelector.cpp new file mode 100644 index 00000000000..b0071f9f7c4 --- /dev/null +++ b/src/Storages/MergeTree/MergeSelectors/TrivialMergeSelector.cpp @@ -0,0 +1,94 @@ +#include +#include + +#include +#include + +#include + + +namespace DB +{ + +void registerTrivialMergeSelector(MergeSelectorFactory & factory) +{ + factory.registerPublicSelector("Trivial", MergeSelectorAlgorithm::TRIVIAL, [](const std::any &) + { + return std::make_shared(); + }); +} + +TrivialMergeSelector::PartsRange TrivialMergeSelector::select( + const PartsRanges & parts_ranges, + size_t max_total_size_to_merge) +{ + size_t num_partitions = parts_ranges.size(); + if (num_partitions == 0) + return {}; + + /// Sort partitions from the largest to smallest in the number of parts. + std::vector sorted_partition_indices; + sorted_partition_indices.reserve(num_partitions); + for (size_t i = 0; i < num_partitions; ++i) + if (parts_ranges[i].size() >= settings.num_parts_to_merge) + sorted_partition_indices.emplace_back(i); + + if (sorted_partition_indices.empty()) + return {}; + + std::sort(sorted_partition_indices.begin(), sorted_partition_indices.end(), + [&](size_t i, size_t j){ return parts_ranges[i].size() > parts_ranges[j].size(); }); + + size_t partition_idx = 0; + size_t left = 0; + size_t right = 0; + + std::vector candidates; + while (candidates.size() < settings.num_ranges_to_choose) + { + const PartsRange & partition = parts_ranges[partition_idx]; + + if (1 + right - left == settings.num_parts_to_merge) + { + ++right; + + size_t total_size = 0; + for (size_t i = left; i < right; ++i) + total_size += partition[i].size; + + if (!max_total_size_to_merge || total_size <= max_total_size_to_merge) + { + candidates.emplace_back(partition.data() + left, partition.data() + right); + if (candidates.size() == settings.num_ranges_to_choose) + break; + } + + left = right; + } + + if (partition.size() - left < settings.num_parts_to_merge) + { + ++partition_idx; + if (partition_idx == sorted_partition_indices.size()) + break; + + left = 0; + right = 0; + } + + ++right; + + if (partition[right].level < partition[left].level) + left = right; + } + + if (candidates.empty()) + return {}; + + if (candidates.size() == 1) + return candidates[0]; + + return candidates[thread_local_rng() % candidates.size()]; +} + +} diff --git a/src/Storages/MergeTree/MergeSelectors/TrivialMergeSelector.h b/src/Storages/MergeTree/MergeSelectors/TrivialMergeSelector.h new file mode 100644 index 00000000000..6d989aea0fb --- /dev/null +++ b/src/Storages/MergeTree/MergeSelectors/TrivialMergeSelector.h @@ -0,0 +1,32 @@ +#pragma once + +#include + + +namespace DB +{ + +/** Go through partitions starting from the largest (in the number of parts). + * Go through parts from left to right. + * Find the first range of N parts where their level is not decreasing. + * Then continue finding these ranges and find up to M of these ranges. + * Choose a random one from them. + */ +class TrivialMergeSelector : public IMergeSelector +{ +public: + struct Settings + { + size_t num_parts_to_merge = 10; + size_t num_ranges_to_choose = 100; + }; + + PartsRange select( + const PartsRanges & parts_ranges, + size_t max_total_size_to_merge) override; + +private: + const Settings settings; +}; + +} diff --git a/src/Storages/MergeTree/MergeSelectors/registerMergeSelectors.cpp b/src/Storages/MergeTree/MergeSelectors/registerMergeSelectors.cpp index 61f941adc36..6a3c1ef4b2b 100644 --- a/src/Storages/MergeTree/MergeSelectors/registerMergeSelectors.cpp +++ b/src/Storages/MergeTree/MergeSelectors/registerMergeSelectors.cpp @@ -7,6 +7,7 @@ namespace DB void registerSimpleMergeSelector(MergeSelectorFactory & factory); void registerStochasticSimpleMergeSelector(MergeSelectorFactory & factory); +void registerTrivialMergeSelector(MergeSelectorFactory & factory); void registerAllMergeSelector(MergeSelectorFactory & factory); void registerTTLDeleteMergeSelector(MergeSelectorFactory & factory); void registerTTLRecompressMergeSelector(MergeSelectorFactory & factory); @@ -17,6 +18,7 @@ void registerMergeSelectors() registerSimpleMergeSelector(factory); registerStochasticSimpleMergeSelector(factory); + registerTrivialMergeSelector(factory); registerAllMergeSelector(factory); registerTTLDeleteMergeSelector(factory); registerTTLRecompressMergeSelector(factory); diff --git a/src/Storages/MergeTree/MergeTask.cpp b/src/Storages/MergeTree/MergeTask.cpp index c171acb8089..e3ace824115 100644 --- a/src/Storages/MergeTree/MergeTask.cpp +++ b/src/Storages/MergeTree/MergeTask.cpp @@ -6,11 +6,8 @@ #include #include -#include #include #include -#include -#include #include #include #include @@ -20,10 +17,8 @@ #include #include #include -#include #include #include -#include #include #include #include @@ -34,9 +29,6 @@ #include #include #include -#include -#include -#include #include #include #include @@ -48,9 +40,22 @@ #include #include +#ifndef NDEBUG + #include +#endif + +#ifdef CLICKHOUSE_CLOUD + #include + #include + #include + #include +#endif + + namespace ProfileEvents { extern const Event Merge; + extern const Event MergeSourceParts; extern const Event MergedColumns; extern const Event GatheredColumns; extern const Event MergeTotalMilliseconds; @@ -302,6 +307,7 @@ void MergeTask::ExecuteAndFinalizeHorizontalPart::extractMergingAndGatheringColu bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare() const { ProfileEvents::increment(ProfileEvents::Merge); + ProfileEvents::increment(ProfileEvents::MergeSourceParts, global_ctx->future_part->parts.size()); String local_tmp_prefix; if (global_ctx->need_prefix) @@ -1392,7 +1398,7 @@ bool MergeTask::execute() } -/// Apply merge strategy (Ordinary, Colapsing, Aggregating, etc) to the stream +/// Apply merge strategy (Ordinary, Collapsing, Aggregating, etc) to the stream class MergePartsStep : public ITransformingStep { public: @@ -1428,7 +1434,7 @@ public: /// that is going in insertion order. ProcessorPtr merged_transform; - const auto &header = pipeline.getHeader(); + const auto & header = pipeline.getHeader(); const auto input_streams_count = pipeline.getNumStreams(); WriteBuffer * rows_sources_write_buf = nullptr; @@ -1697,7 +1703,7 @@ void MergeTask::ExecuteAndFinalizeHorizontalPart::createMergedStream() const sort_description, partition_key_columns, global_ctx->merging_params, - (is_vertical_merge ? RowsSourcesTemporaryFile::FILE_ID : ""), /// rows_sources temporaty file is used only for vertical merge + (is_vertical_merge ? RowsSourcesTemporaryFile::FILE_ID : ""), /// rows_sources' temporary file is used only for vertical merge (*data_settings)[MergeTreeSetting::merge_max_block_size], (*data_settings)[MergeTreeSetting::merge_max_block_size_bytes], ctx->blocks_are_granules_size, diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 5ed8b062b2f..8611681a976 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -2666,6 +2666,10 @@ void MergeTreeData::removePartsFinally(const MergeTreeData::DataPartsVector & pa for (const auto & part : parts) { part_log_elem.partition_id = part->info.partition_id; + { + WriteBufferFromString out(part_log_elem.partition); + part->partition.serializeText(part->storage, out, {}); + } part_log_elem.part_name = part->name; part_log_elem.bytes_compressed_on_disk = part->getBytesOnDisk(); part_log_elem.bytes_uncompressed = part->getBytesUncompressedOnDisk(); @@ -7915,7 +7919,8 @@ try part_log_elem.event_type = type; - if (part_log_elem.event_type == PartLogElement::MERGE_PARTS) + if (part_log_elem.event_type == PartLogElement::MERGE_PARTS + || part_log_elem.event_type == PartLogElement::MERGE_PARTS_START) { if (merge_entry) { @@ -7940,6 +7945,20 @@ try part_log_elem.table_name = table_id.table_name; part_log_elem.table_uuid = table_id.uuid; part_log_elem.partition_id = MergeTreePartInfo::fromPartName(new_part_name, format_version).partition_id; + + { + const DataPart * result_or_source_data_part = nullptr; + if (result_part) + result_or_source_data_part = result_part.get(); + else if (!source_parts.empty()) + result_or_source_data_part = source_parts.at(0).get(); + if (result_or_source_data_part) + { + WriteBufferFromString out(part_log_elem.partition); + result_or_source_data_part->partition.serializeText(*this, out, {}); + } + } + part_log_elem.part_name = new_part_name; if (result_part) @@ -7969,10 +7988,6 @@ try { part_log_elem.profile_counters = profile_counters; } - else - { - LOG_WARNING(log, "Profile counters are not set"); - } part_log->add(std::move(part_log_elem)); } diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index 13918ae8e91..d7305045a56 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -1022,11 +1022,7 @@ size_t MergeTreeDataSelectExecutor::roundRowsOrBytesToMarks( /// Same as roundRowsOrBytesToMarks() but do not return more then max_marks size_t MergeTreeDataSelectExecutor::minMarksForConcurrentRead( - size_t rows_setting, - size_t bytes_setting, - size_t rows_granularity, - size_t bytes_granularity, - size_t max_marks) + size_t rows_setting, size_t bytes_setting, size_t rows_granularity, size_t bytes_granularity, size_t min_marks, size_t max_marks) { size_t marks = 1; @@ -1035,18 +1031,17 @@ size_t MergeTreeDataSelectExecutor::minMarksForConcurrentRead( else if (rows_setting) marks = (rows_setting + rows_granularity - 1) / rows_granularity; - if (bytes_granularity == 0) - return marks; - - /// Overflow - if (bytes_setting + bytes_granularity <= bytes_setting) /// overflow - return max_marks; - if (bytes_setting) - return std::max(marks, (bytes_setting + bytes_granularity - 1) / bytes_granularity); - return marks; + if (bytes_granularity) + { + /// Overflow + if (bytes_setting + bytes_granularity <= bytes_setting) /// overflow + marks = max_marks; + else if (bytes_setting) + marks = std::max(marks, (bytes_setting + bytes_granularity - 1) / bytes_granularity); + } + return std::max(marks, min_marks); } - /// Calculates a set of mark ranges, that could possibly contain keys, required by condition. /// In other words, it removes subranges from whole range, that definitely could not contain required keys. /// If @exact_ranges is not null, fill it with ranges containing marks of fully matched records. diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h index 70536b7aa54..d16d9243c14 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h @@ -153,11 +153,7 @@ public: /// The same as roundRowsOrBytesToMarks, but return no more than max_marks. static size_t minMarksForConcurrentRead( - size_t rows_setting, - size_t bytes_setting, - size_t rows_granularity, - size_t bytes_granularity, - size_t max_marks); + size_t rows_setting, size_t bytes_setting, size_t rows_granularity, size_t bytes_granularity, size_t min_marks, size_t max_marks); /// If possible, construct optional key condition from predicates containing _part_offset column. static void buildKeyConditionFromPartOffset( diff --git a/src/Storages/MergeTree/MergeTreeIndexGranularity.cpp b/src/Storages/MergeTree/MergeTreeIndexGranularity.cpp index 467d2567df1..d69a00643f0 100644 --- a/src/Storages/MergeTree/MergeTreeIndexGranularity.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexGranularity.cpp @@ -96,29 +96,13 @@ size_t MergeTreeIndexGranularity::countMarksForRows(size_t from_mark, size_t num return to_mark - from_mark; } -size_t MergeTreeIndexGranularity::countRowsForRows(size_t from_mark, size_t number_of_rows, size_t offset_in_rows, size_t min_marks_to_read) const +size_t MergeTreeIndexGranularity::countRowsForRows(size_t from_mark, size_t number_of_rows, size_t offset_in_rows) const { size_t rows_before_mark = getMarkStartingRow(from_mark); size_t last_row_pos = rows_before_mark + offset_in_rows + number_of_rows; auto it = std::upper_bound(marks_rows_partial_sums.begin(), marks_rows_partial_sums.end(), last_row_pos); size_t to_mark = it - marks_rows_partial_sums.begin(); - /// This is a heuristic to respect min_marks_to_read which is ignored by MergeTreeReadPool in case of remote disk. - /// See comment in IMergeTreeSelectAlgorithm. - if (min_marks_to_read) - { - // check overflow - size_t min_marks_to_read_2 = 0; - bool overflow = common::mulOverflow(min_marks_to_read, 2, min_marks_to_read_2); - - size_t to_mark_overwrite = 0; - if (!overflow) - overflow = common::addOverflow(from_mark, min_marks_to_read_2, to_mark_overwrite); - - if (!overflow && to_mark_overwrite < to_mark) - to_mark = to_mark_overwrite; - } - return getRowsCountInRange(from_mark, std::max(1UL, to_mark)) - offset_in_rows; } diff --git a/src/Storages/MergeTree/MergeTreeIndexGranularity.h b/src/Storages/MergeTree/MergeTreeIndexGranularity.h index 78a1423ad7e..f66e721ec1e 100644 --- a/src/Storages/MergeTree/MergeTreeIndexGranularity.h +++ b/src/Storages/MergeTree/MergeTreeIndexGranularity.h @@ -37,7 +37,7 @@ public: /// |-----|---------------------------|----|----| /// ^------------------------^-----------^ //// from_mark offset_in_rows number_of_rows - size_t countRowsForRows(size_t from_mark, size_t number_of_rows, size_t offset_in_rows, size_t min_marks_to_read) const; + size_t countRowsForRows(size_t from_mark, size_t number_of_rows, size_t offset_in_rows) const; /// Total marks size_t getMarksCount() const; diff --git a/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp b/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp index a99172c4acd..4e5389f2869 100644 --- a/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp +++ b/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp @@ -1,6 +1,6 @@ +#include #include #include -#include #include #include #include @@ -8,13 +8,13 @@ #include #include #include -#include #include +#include #include #include -#include #include -#include +#include +#include namespace ProfileEvents @@ -102,6 +102,7 @@ MergeTreePrefetchedReadPool::MergeTreePrefetchedReadPool( const MergeTreeReaderSettings & reader_settings_, const Names & column_names_, const PoolSettings & settings_, + const MergeTreeReadTask::BlockSizeParams & params_, const ContextPtr & context_) : MergeTreeReadPoolBase( std::move(parts_), @@ -113,9 +114,12 @@ MergeTreePrefetchedReadPool::MergeTreePrefetchedReadPool( reader_settings_, column_names_, settings_, + params_, context_) , prefetch_threadpool(getContext()->getPrefetchThreadpool()) - , log(getLogger("MergeTreePrefetchedReadPool(" + (parts_ranges.empty() ? "" : parts_ranges.front().data_part->storage.getStorageID().getNameForLogs()) + ")")) + , log(getLogger( + "MergeTreePrefetchedReadPool(" + + (parts_ranges.empty() ? "" : parts_ranges.front().data_part->storage.getStorageID().getNameForLogs()) + ")")) { /// Tasks creation might also create a lost of readers - check they do not /// do any time consuming operations in ctor. @@ -304,25 +308,11 @@ MergeTreeReadTaskPtr MergeTreePrefetchedReadPool::stealTask(size_t thread, Merge MergeTreeReadTaskPtr MergeTreePrefetchedReadPool::createTask(ThreadTask & task, MergeTreeReadTask * previous_task) { if (task.isValidReadersFuture()) - { - auto size_predictor = task.read_info->shared_size_predictor - ? std::make_unique(*task.read_info->shared_size_predictor) - : nullptr; - - return std::make_unique(task.read_info, task.readers_future->get(), task.ranges, std::move(size_predictor)); - } + return MergeTreeReadPoolBase::createTask(task.read_info, task.readers_future->get(), task.ranges); return MergeTreeReadPoolBase::createTask(task.read_info, task.ranges, previous_task); } -size_t getApproximateSizeOfGranule(const IMergeTreeDataPart & part, const Names & columns_to_read) -{ - ColumnSize columns_size{}; - for (const auto & col_name : columns_to_read) - columns_size.add(part.getColumnSize(col_name)); - return columns_size.data_compressed / part.getMarksCount(); -} - void MergeTreePrefetchedReadPool::fillPerPartStatistics() { per_part_statistics.clear(); @@ -338,11 +328,7 @@ void MergeTreePrefetchedReadPool::fillPerPartStatistics() for (const auto & range : parts_ranges[i].ranges) part_stat.sum_marks += range.end - range.begin; - const auto & columns = settings[Setting::merge_tree_determine_task_size_by_prewhere_columns] && prewhere_info - ? prewhere_info->prewhere_actions.getRequiredColumnsNames() - : column_names; - - part_stat.approx_size_of_mark = getApproximateSizeOfGranule(*read_info.data_part, columns); + part_stat.approx_size_of_mark = read_info.approx_size_of_mark; auto update_stat_for_column = [&](const auto & column_name) { diff --git a/src/Storages/MergeTree/MergeTreePrefetchedReadPool.h b/src/Storages/MergeTree/MergeTreePrefetchedReadPool.h index 1a709250937..b94d4ea113a 100644 --- a/src/Storages/MergeTree/MergeTreePrefetchedReadPool.h +++ b/src/Storages/MergeTree/MergeTreePrefetchedReadPool.h @@ -27,6 +27,7 @@ public: const MergeTreeReaderSettings & reader_settings_, const Names & column_names_, const PoolSettings & settings_, + const MergeTreeReadTask::BlockSizeParams & params_, const ContextPtr & context_); String getName() const override { return "PrefetchedReadPool"; } diff --git a/src/Storages/MergeTree/MergeTreeReadPool.cpp b/src/Storages/MergeTree/MergeTreeReadPool.cpp index 1e4922757f4..d266ad55824 100644 --- a/src/Storages/MergeTree/MergeTreeReadPool.cpp +++ b/src/Storages/MergeTree/MergeTreeReadPool.cpp @@ -45,6 +45,7 @@ MergeTreeReadPool::MergeTreeReadPool( const MergeTreeReaderSettings & reader_settings_, const Names & column_names_, const PoolSettings & settings_, + const MergeTreeReadTask::BlockSizeParams & params_, const ContextPtr & context_) : MergeTreeReadPoolBase( std::move(parts_), @@ -56,6 +57,7 @@ MergeTreeReadPool::MergeTreeReadPool( reader_settings_, column_names_, settings_, + params_, context_) , backoff_settings{context_->getSettingsRef()} , backoff_state{pool_settings.threads} diff --git a/src/Storages/MergeTree/MergeTreeReadPool.h b/src/Storages/MergeTree/MergeTreeReadPool.h index c51dca315f9..a0425f0951c 100644 --- a/src/Storages/MergeTree/MergeTreeReadPool.h +++ b/src/Storages/MergeTree/MergeTreeReadPool.h @@ -34,6 +34,7 @@ public: const MergeTreeReaderSettings & reader_settings_, const Names & column_names_, const PoolSettings & settings_, + const MergeTreeReadTask::BlockSizeParams & params_, const ContextPtr & context_); ~MergeTreeReadPool() override = default; diff --git a/src/Storages/MergeTree/MergeTreeReadPoolBase.cpp b/src/Storages/MergeTree/MergeTreeReadPoolBase.cpp index 6ce1726398a..15a87f463b4 100644 --- a/src/Storages/MergeTree/MergeTreeReadPoolBase.cpp +++ b/src/Storages/MergeTree/MergeTreeReadPoolBase.cpp @@ -10,6 +10,7 @@ namespace Setting { extern const SettingsBool merge_tree_determine_task_size_by_prewhere_columns; extern const SettingsUInt64 merge_tree_min_bytes_per_task_for_remote_reading; + extern const SettingsUInt64 merge_tree_min_read_task_size; } namespace ErrorCodes @@ -27,6 +28,7 @@ MergeTreeReadPoolBase::MergeTreeReadPoolBase( const MergeTreeReaderSettings & reader_settings_, const Names & column_names_, const PoolSettings & pool_settings_, + const MergeTreeReadTask::BlockSizeParams & block_size_params_, const ContextPtr & context_) : WithContext(context_) , parts_ranges(std::move(parts_)) @@ -38,6 +40,7 @@ MergeTreeReadPoolBase::MergeTreeReadPoolBase( , reader_settings(reader_settings_) , column_names(column_names_) , pool_settings(pool_settings_) + , block_size_params(block_size_params_) , owned_mark_cache(context_->getGlobalContext()->getMarkCache()) , owned_uncompressed_cache(pool_settings_.use_uncompressed_cache ? context_->getGlobalContext()->getUncompressedCache() : nullptr) , header(storage_snapshot->getSampleBlockForColumns(column_names)) @@ -46,7 +49,7 @@ MergeTreeReadPoolBase::MergeTreeReadPoolBase( fillPerPartInfos(context_->getSettingsRef()); } -static size_t getApproxSizeOfPart(const IMergeTreeDataPart & part, const Names & columns_to_read) +static size_t getSizeOfColumns(const IMergeTreeDataPart & part, const Names & columns_to_read) { ColumnSize columns_size{}; for (const auto & col_name : columns_to_read) @@ -55,44 +58,67 @@ static size_t getApproxSizeOfPart(const IMergeTreeDataPart & part, const Names & return columns_size.data_compressed ? columns_size.data_compressed : part.getBytesOnDisk(); } -static size_t calculateMinMarksPerTask( +/// Columns from different prewhere steps are read independently, so it makes sense to use the heaviest set of columns among them as an estimation. +static Names +getHeaviestSetOfColumnsAmongPrewhereSteps(const IMergeTreeDataPart & part, const std::vector & prewhere_steps_columns) +{ + const auto it = std::ranges::max_element( + prewhere_steps_columns, + [&](const auto & lhs, const auto & rhs) + { return getSizeOfColumns(part, lhs.getNames()) < getSizeOfColumns(part, rhs.getNames()); }); + return it->getNames(); +} + +static std::pair // (min_marks_per_task, avg_mark_bytes) +calculateMinMarksPerTask( const RangesInDataPart & part, const Names & columns_to_read, - PrewhereInfoPtr prewhere_info, + const std::vector & prewhere_steps_columns, const MergeTreeReadPoolBase::PoolSettings & pool_settings, const Settings & settings) { - size_t min_marks_per_task = pool_settings.min_marks_for_concurrent_read; - const size_t part_marks_count = part.getMarksCount(); - if (part_marks_count && part.data_part->isStoredOnRemoteDisk()) + size_t min_marks_per_task + = std::max(settings[Setting::merge_tree_min_read_task_size], pool_settings.min_marks_for_concurrent_read); + size_t avg_mark_bytes = 0; + /// It is important to obtain marks count from the part itself instead of calling `part.getMarksCount()`, + /// because `part` will report number of marks selected from this part by the query. + const size_t part_marks_count = part.data_part->getMarksCount(); + if (part_marks_count) { - /// We assume that most of the time prewhere does it's job good meaning that lion's share of the rows is filtered out. - /// Which means in turn that for most of the rows we will read only the columns from prewhere clause. - /// So it makes sense to use only them for the estimation. - const auto & columns = settings[Setting::merge_tree_determine_task_size_by_prewhere_columns] && prewhere_info - ? prewhere_info->prewhere_actions.getRequiredColumnsNames() - : columns_to_read; - const size_t part_compressed_bytes = getApproxSizeOfPart(*part.data_part, columns); - - const auto avg_mark_bytes = std::max(part_compressed_bytes / part_marks_count, 1); - const auto min_bytes_per_task = settings[Setting::merge_tree_min_bytes_per_task_for_remote_reading]; - /// We're taking min here because number of tasks shouldn't be too low - it will make task stealing impossible. - /// We also create at least two tasks per thread to have something to steal from a slow thread. - const auto heuristic_min_marks - = std::min(pool_settings.sum_marks / pool_settings.threads / 2, min_bytes_per_task / avg_mark_bytes); - if (heuristic_min_marks > min_marks_per_task) + if (part.data_part->isStoredOnRemoteDisk()) { - LOG_TEST( - &Poco::Logger::get("MergeTreeReadPoolBase"), - "Increasing min_marks_per_task from {} to {} based on columns size heuristic", - min_marks_per_task, - heuristic_min_marks); - min_marks_per_task = heuristic_min_marks; + /// We assume that most of the time prewhere does it's job good meaning that lion's share of the rows is filtered out. + /// Which means in turn that for most of the rows we will read only the columns from prewhere clause. + /// So it makes sense to use only them for the estimation. + const auto & columns = settings[Setting::merge_tree_determine_task_size_by_prewhere_columns] && !prewhere_steps_columns.empty() + ? getHeaviestSetOfColumnsAmongPrewhereSteps(*part.data_part, prewhere_steps_columns) + : columns_to_read; + const size_t part_compressed_bytes = getSizeOfColumns(*part.data_part, columns); + + avg_mark_bytes = std::max(part_compressed_bytes / part_marks_count, 1); + const auto min_bytes_per_task = settings[Setting::merge_tree_min_bytes_per_task_for_remote_reading]; + /// We're taking min here because number of tasks shouldn't be too low - it will make task stealing impossible. + /// We also create at least two tasks per thread to have something to steal from a slow thread. + const auto heuristic_min_marks + = std::min(pool_settings.sum_marks / pool_settings.threads / 2, min_bytes_per_task / avg_mark_bytes); + if (heuristic_min_marks > min_marks_per_task) + { + LOG_TEST( + &Poco::Logger::get("MergeTreeReadPoolBase"), + "Increasing min_marks_per_task from {} to {} based on columns size heuristic", + min_marks_per_task, + heuristic_min_marks); + min_marks_per_task = heuristic_min_marks; + } + } + else + { + avg_mark_bytes = std::max(getSizeOfColumns(*part.data_part, columns_to_read) / part_marks_count, 1); } } LOG_TEST(&Poco::Logger::get("MergeTreeReadPoolBase"), "Will use min_marks_per_task={}", min_marks_per_task); - return min_marks_per_task; + return {min_marks_per_task, avg_mark_bytes}; } void MergeTreeReadPoolBase::fillPerPartInfos(const Settings & settings) @@ -159,8 +185,8 @@ void MergeTreeReadPoolBase::fillPerPartInfos(const Settings & settings) } is_part_on_remote_disk.push_back(part_with_ranges.data_part->isStoredOnRemoteDisk()); - read_task_info.min_marks_per_task - = calculateMinMarksPerTask(part_with_ranges, column_names, prewhere_info, pool_settings, settings); + std::tie(read_task_info.min_marks_per_task, read_task_info.approx_size_of_mark) + = calculateMinMarksPerTask(part_with_ranges, column_names, read_task_info.task_columns.pre_columns, pool_settings, settings); per_part_infos.push_back(std::make_shared(std::move(read_task_info))); } } @@ -182,15 +208,20 @@ std::vector MergeTreeReadPoolBase::getPerPartSumMarks() const return per_part_sum_marks; } -MergeTreeReadTaskPtr MergeTreeReadPoolBase::createTask( - MergeTreeReadTaskInfoPtr read_info, - MarkRanges ranges, - MergeTreeReadTask * previous_task) const +MergeTreeReadTaskPtr +MergeTreeReadPoolBase::createTask(MergeTreeReadTaskInfoPtr read_info, MergeTreeReadTask::Readers task_readers, MarkRanges ranges) const { auto task_size_predictor = read_info->shared_size_predictor ? std::make_unique(*read_info->shared_size_predictor) : nullptr; /// make a copy + return std::make_unique( + read_info, std::move(task_readers), std::move(ranges), block_size_params, std::move(task_size_predictor)); +} + +MergeTreeReadTaskPtr +MergeTreeReadPoolBase::createTask(MergeTreeReadTaskInfoPtr read_info, MarkRanges ranges, MergeTreeReadTask * previous_task) const +{ auto get_part_name = [](const auto & task_info) -> String { const auto & data_part = task_info.data_part; @@ -229,11 +260,7 @@ MergeTreeReadTaskPtr MergeTreeReadPoolBase::createTask( task_readers = previous_task->releaseReaders(); } - return std::make_unique( - read_info, - std::move(task_readers), - std::move(ranges), - std::move(task_size_predictor)); + return createTask(read_info, std::move(task_readers), std::move(ranges)); } MergeTreeReadTask::Extras MergeTreeReadPoolBase::getExtras() const diff --git a/src/Storages/MergeTree/MergeTreeReadPoolBase.h b/src/Storages/MergeTree/MergeTreeReadPoolBase.h index 7f9106d476e..19b26156433 100644 --- a/src/Storages/MergeTree/MergeTreeReadPoolBase.h +++ b/src/Storages/MergeTree/MergeTreeReadPoolBase.h @@ -33,6 +33,7 @@ public: const MergeTreeReaderSettings & reader_settings_, const Names & column_names_, const PoolSettings & settings_, + const MergeTreeReadTask::BlockSizeParams & params_, const ContextPtr & context_); Block getHeader() const override { return header; } @@ -48,6 +49,7 @@ protected: const MergeTreeReaderSettings reader_settings; const Names column_names; const PoolSettings pool_settings; + const MergeTreeReadTask::BlockSizeParams block_size_params; const MarkCachePtr owned_mark_cache; const UncompressedCachePtr owned_uncompressed_cache; const Block header; @@ -55,6 +57,8 @@ protected: void fillPerPartInfos(const Settings & settings); std::vector getPerPartSumMarks() const; + MergeTreeReadTaskPtr createTask(MergeTreeReadTaskInfoPtr read_info, MergeTreeReadTask::Readers task_readers, MarkRanges ranges) const; + MergeTreeReadTaskPtr createTask( MergeTreeReadTaskInfoPtr read_info, MarkRanges ranges, diff --git a/src/Storages/MergeTree/MergeTreeReadPoolInOrder.cpp b/src/Storages/MergeTree/MergeTreeReadPoolInOrder.cpp index 60f127acdae..c4244ecd982 100644 --- a/src/Storages/MergeTree/MergeTreeReadPoolInOrder.cpp +++ b/src/Storages/MergeTree/MergeTreeReadPoolInOrder.cpp @@ -20,6 +20,7 @@ MergeTreeReadPoolInOrder::MergeTreeReadPoolInOrder( const MergeTreeReaderSettings & reader_settings_, const Names & column_names_, const PoolSettings & settings_, + const MergeTreeReadTask::BlockSizeParams & params_, const ContextPtr & context_) : MergeTreeReadPoolBase( std::move(parts_), @@ -31,6 +32,7 @@ MergeTreeReadPoolInOrder::MergeTreeReadPoolInOrder( reader_settings_, column_names_, settings_, + params_, context_) , has_limit_below_one_block(has_limit_below_one_block_) , read_type(read_type_) diff --git a/src/Storages/MergeTree/MergeTreeReadPoolInOrder.h b/src/Storages/MergeTree/MergeTreeReadPoolInOrder.h index a3668acb170..41f3ab1061c 100644 --- a/src/Storages/MergeTree/MergeTreeReadPoolInOrder.h +++ b/src/Storages/MergeTree/MergeTreeReadPoolInOrder.h @@ -19,6 +19,7 @@ public: const MergeTreeReaderSettings & reader_settings_, const Names & column_names_, const PoolSettings & settings_, + const MergeTreeReadTask::BlockSizeParams & params_, const ContextPtr & context_); String getName() const override { return "ReadPoolInOrder"; } diff --git a/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.cpp b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.cpp index 075c0b1042b..8f06fc312c2 100644 --- a/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.cpp +++ b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.cpp @@ -112,6 +112,7 @@ MergeTreeReadPoolParallelReplicas::MergeTreeReadPoolParallelReplicas( const MergeTreeReaderSettings & reader_settings_, const Names & column_names_, const PoolSettings & settings_, + const MergeTreeReadTask::BlockSizeParams & params_, const ContextPtr & context_) : MergeTreeReadPoolBase( std::move(parts_), @@ -123,6 +124,7 @@ MergeTreeReadPoolParallelReplicas::MergeTreeReadPoolParallelReplicas( reader_settings_, column_names_, settings_, + params_, context_) , extension(std::move(extension_)) , coordination_mode(CoordinationMode::Default) diff --git a/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.h b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.h index b9f2e133c4a..63816340eb1 100644 --- a/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.h +++ b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.h @@ -19,6 +19,7 @@ public: const MergeTreeReaderSettings & reader_settings_, const Names & column_names_, const PoolSettings & settings_, + const MergeTreeReadTask::BlockSizeParams & params_, const ContextPtr & context_); ~MergeTreeReadPoolParallelReplicas() override = default; diff --git a/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicasInOrder.cpp b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicasInOrder.cpp index 8ff2a4f31ee..f13da426c45 100644 --- a/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicasInOrder.cpp +++ b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicasInOrder.cpp @@ -26,6 +26,7 @@ MergeTreeReadPoolParallelReplicasInOrder::MergeTreeReadPoolParallelReplicasInOrd const MergeTreeReaderSettings & reader_settings_, const Names & column_names_, const PoolSettings & settings_, + const MergeTreeReadTask::BlockSizeParams & params_, const ContextPtr & context_) : MergeTreeReadPoolBase( std::move(parts_), @@ -37,6 +38,7 @@ MergeTreeReadPoolParallelReplicasInOrder::MergeTreeReadPoolParallelReplicasInOrd reader_settings_, column_names_, settings_, + params_, context_) , extension(std::move(extension_)) , mode(mode_) diff --git a/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicasInOrder.h b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicasInOrder.h index 98a4d95768a..a05dc54b529 100644 --- a/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicasInOrder.h +++ b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicasInOrder.h @@ -20,6 +20,7 @@ public: const MergeTreeReaderSettings & reader_settings_, const Names & column_names_, const PoolSettings & settings_, + const MergeTreeReadTask::BlockSizeParams & params_, const ContextPtr & context_); String getName() const override { return "ReadPoolParallelReplicasInOrder"; } diff --git a/src/Storages/MergeTree/MergeTreeReadTask.cpp b/src/Storages/MergeTree/MergeTreeReadTask.cpp index dd057dc9984..72fddb93a6d 100644 --- a/src/Storages/MergeTree/MergeTreeReadTask.cpp +++ b/src/Storages/MergeTree/MergeTreeReadTask.cpp @@ -26,10 +26,12 @@ MergeTreeReadTask::MergeTreeReadTask( MergeTreeReadTaskInfoPtr info_, Readers readers_, MarkRanges mark_ranges_, + const BlockSizeParams & block_size_params_, MergeTreeBlockSizePredictorPtr size_predictor_) : info(std::move(info_)) , readers(std::move(readers_)) , mark_ranges(std::move(mark_ranges_)) + , block_size_params(block_size_params_) , size_predictor(std::move(size_predictor_)) { } @@ -112,30 +114,31 @@ void MergeTreeReadTask::initializeRangeReaders(const PrewhereExprInfo & prewhere range_readers = createRangeReaders(readers, prewhere_actions); } -UInt64 MergeTreeReadTask::estimateNumRows(const BlockSizeParams & params) const +UInt64 MergeTreeReadTask::estimateNumRows() const { if (!size_predictor) { - if (params.preferred_block_size_bytes) + if (block_size_params.preferred_block_size_bytes) throw Exception(ErrorCodes::LOGICAL_ERROR, "Size predictor is not set, it might lead to a performance degradation"); - return static_cast(params.max_block_size_rows); + return static_cast(block_size_params.max_block_size_rows); } /// Calculates number of rows will be read using preferred_block_size_bytes. /// Can't be less than avg_index_granularity. - size_t rows_to_read = size_predictor->estimateNumRows(params.preferred_block_size_bytes); + size_t rows_to_read = size_predictor->estimateNumRows(block_size_params.preferred_block_size_bytes); if (!rows_to_read) return rows_to_read; auto total_row_in_current_granule = range_readers.main.numRowsInCurrentGranule(); rows_to_read = std::max(total_row_in_current_granule, rows_to_read); - if (params.preferred_max_column_in_block_size_bytes) + if (block_size_params.preferred_max_column_in_block_size_bytes) { /// Calculates number of rows will be read using preferred_max_column_in_block_size_bytes. - auto rows_to_read_for_max_size_column = size_predictor->estimateNumRowsForMaxSizeColumn(params.preferred_max_column_in_block_size_bytes); + auto rows_to_read_for_max_size_column + = size_predictor->estimateNumRowsForMaxSizeColumn(block_size_params.preferred_max_column_in_block_size_bytes); - double filtration_ratio = std::max(params.min_filtration_ratio, 1.0 - size_predictor->filtered_rows_ratio); + double filtration_ratio = std::max(block_size_params.min_filtration_ratio, 1.0 - size_predictor->filtered_rows_ratio); auto rows_to_read_for_max_size_column_with_filtration = static_cast(rows_to_read_for_max_size_column / filtration_ratio); @@ -148,16 +151,16 @@ UInt64 MergeTreeReadTask::estimateNumRows(const BlockSizeParams & params) const return rows_to_read; const auto & index_granularity = info->data_part->index_granularity; - return index_granularity.countRowsForRows(range_readers.main.currentMark(), rows_to_read, range_readers.main.numReadRowsInCurrentGranule(), params.min_marks_to_read); + return index_granularity.countRowsForRows(range_readers.main.currentMark(), rows_to_read, range_readers.main.numReadRowsInCurrentGranule()); } -MergeTreeReadTask::BlockAndProgress MergeTreeReadTask::read(const BlockSizeParams & params) +MergeTreeReadTask::BlockAndProgress MergeTreeReadTask::read() { if (size_predictor) size_predictor->startBlock(); - UInt64 recommended_rows = estimateNumRows(params); - UInt64 rows_to_read = std::max(static_cast(1), std::min(params.max_block_size_rows, recommended_rows)); + UInt64 recommended_rows = estimateNumRows(); + UInt64 rows_to_read = std::max(static_cast(1), std::min(block_size_params.max_block_size_rows, recommended_rows)); auto read_result = range_readers.main.read(rows_to_read, mark_ranges); diff --git a/src/Storages/MergeTree/MergeTreeReadTask.h b/src/Storages/MergeTree/MergeTreeReadTask.h index 748babb5b4c..2853cc39c51 100644 --- a/src/Storages/MergeTree/MergeTreeReadTask.h +++ b/src/Storages/MergeTree/MergeTreeReadTask.h @@ -70,6 +70,7 @@ struct MergeTreeReadTaskInfo VirtualFields const_virtual_fields; /// The amount of data to read per task based on size of the queried columns. size_t min_marks_per_task = 0; + size_t approx_size_of_mark = 0; }; using MergeTreeReadTaskInfoPtr = std::shared_ptr; @@ -110,7 +111,6 @@ public: UInt64 max_block_size_rows = DEFAULT_BLOCK_SIZE; UInt64 preferred_block_size_bytes = 1000000; UInt64 preferred_max_column_in_block_size_bytes = 0; - UInt64 min_marks_to_read = 0; double min_filtration_ratio = 0.00001; }; @@ -127,12 +127,12 @@ public: MergeTreeReadTaskInfoPtr info_, Readers readers_, MarkRanges mark_ranges_, - + const BlockSizeParams & block_size_params_, MergeTreeBlockSizePredictorPtr size_predictor_); void initializeRangeReaders(const PrewhereExprInfo & prewhere_actions); - BlockAndProgress read(const BlockSizeParams & params); + BlockAndProgress read(); bool isFinished() const { return mark_ranges.empty() && range_readers.main.isCurrentRangeFinished(); } const MergeTreeReadTaskInfo & getInfo() const { return *info; } @@ -145,7 +145,7 @@ public: static RangeReaders createRangeReaders(const Readers & readers, const PrewhereExprInfo & prewhere_actions); private: - UInt64 estimateNumRows(const BlockSizeParams & params) const; + UInt64 estimateNumRows() const; /// Shared information required for reading. MergeTreeReadTaskInfoPtr info; @@ -160,6 +160,8 @@ private: /// Ranges to read from data_part MarkRanges mark_ranges; + BlockSizeParams block_size_params; + /// Used to satistfy preferred_block_size_bytes limitation MergeTreeBlockSizePredictorPtr size_predictor; }; diff --git a/src/Storages/MergeTree/MergeTreeSelectAlgorithms.cpp b/src/Storages/MergeTree/MergeTreeSelectAlgorithms.cpp index bf97d269dc6..213eab52ad8 100644 --- a/src/Storages/MergeTree/MergeTreeSelectAlgorithms.cpp +++ b/src/Storages/MergeTree/MergeTreeSelectAlgorithms.cpp @@ -30,7 +30,8 @@ MergeTreeReadTaskPtr MergeTreeInReverseOrderSelectAlgorithm::getNewTask(IMergeTr return pool.getTask(part_idx, previous_task); } -MergeTreeReadTask::BlockAndProgress MergeTreeInReverseOrderSelectAlgorithm::readFromTask(MergeTreeReadTask & task, const BlockSizeParams & params) +MergeTreeReadTask::BlockAndProgress +MergeTreeInReverseOrderSelectAlgorithm::readFromTask(MergeTreeReadTask & task) { MergeTreeReadTask::BlockAndProgress res; @@ -42,7 +43,7 @@ MergeTreeReadTask::BlockAndProgress MergeTreeInReverseOrderSelectAlgorithm::read } while (!task.isFinished()) - chunks.push_back(task.read(params)); + chunks.push_back(task.read()); if (chunks.empty()) return {}; diff --git a/src/Storages/MergeTree/MergeTreeSelectAlgorithms.h b/src/Storages/MergeTree/MergeTreeSelectAlgorithms.h index afc8032bb99..eeaefb0dc4f 100644 --- a/src/Storages/MergeTree/MergeTreeSelectAlgorithms.h +++ b/src/Storages/MergeTree/MergeTreeSelectAlgorithms.h @@ -21,7 +21,7 @@ public: virtual bool needNewTask(const MergeTreeReadTask & task) const = 0; virtual MergeTreeReadTaskPtr getNewTask(IMergeTreeReadPool & pool, MergeTreeReadTask * previous_task) = 0; - virtual BlockAndProgress readFromTask(MergeTreeReadTask & task, const BlockSizeParams & params) = 0; + virtual BlockAndProgress readFromTask(MergeTreeReadTask & task) = 0; }; using MergeTreeSelectAlgorithmPtr = std::unique_ptr; @@ -35,7 +35,7 @@ public: bool needNewTask(const MergeTreeReadTask & task) const override { return task.isFinished(); } MergeTreeReadTaskPtr getNewTask(IMergeTreeReadPool & pool, MergeTreeReadTask * previous_task) override { return pool.getTask(thread_idx, previous_task); } - BlockAndProgress readFromTask(MergeTreeReadTask & task, const BlockSizeParams & params) override { return task.read(params); } + BlockAndProgress readFromTask(MergeTreeReadTask & task) override { return task.read(); } private: const size_t thread_idx; @@ -50,7 +50,7 @@ public: bool needNewTask(const MergeTreeReadTask & task) const override { return task.isFinished(); } MergeTreeReadTaskPtr getNewTask(IMergeTreeReadPool & pool, MergeTreeReadTask * previous_task) override; - MergeTreeReadTask::BlockAndProgress readFromTask(MergeTreeReadTask & task, const BlockSizeParams & params) override { return task.read(params); } + MergeTreeReadTask::BlockAndProgress readFromTask(MergeTreeReadTask & task) override { return task.read(); } private: const size_t part_idx; @@ -65,7 +65,7 @@ public: bool needNewTask(const MergeTreeReadTask & task) const override { return chunks.empty() && task.isFinished(); } MergeTreeReadTaskPtr getNewTask(IMergeTreeReadPool & pool, MergeTreeReadTask * previous_task) override; - BlockAndProgress readFromTask(MergeTreeReadTask & task, const BlockSizeParams & params) override; + BlockAndProgress readFromTask(MergeTreeReadTask & task) override; private: const size_t part_idx; diff --git a/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp index 76bcf41d6d8..5efd33ce09a 100644 --- a/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp @@ -86,7 +86,6 @@ MergeTreeSelectProcessor::MergeTreeSelectProcessor( MergeTreeSelectAlgorithmPtr algorithm_, const PrewhereInfoPtr & prewhere_info_, const ExpressionActionsSettings & actions_settings_, - const MergeTreeReadTask::BlockSizeParams & block_size_params_, const MergeTreeReaderSettings & reader_settings_) : pool(std::move(pool_)) , algorithm(std::move(algorithm_)) @@ -94,7 +93,6 @@ MergeTreeSelectProcessor::MergeTreeSelectProcessor( , actions_settings(actions_settings_) , prewhere_actions(getPrewhereActions(prewhere_info, actions_settings, reader_settings_.enable_multiple_prewhere_read_steps)) , reader_settings(reader_settings_) - , block_size_params(block_size_params_) , result_header(transformHeader(pool->getHeader(), prewhere_info)) { if (reader_settings.apply_deleted_mask) @@ -190,7 +188,7 @@ ChunkAndProgress MergeTreeSelectProcessor::read() if (!task->getMainRangeReader().isInitialized()) initializeRangeReaders(); - auto res = algorithm->readFromTask(*task, block_size_params); + auto res = algorithm->readFromTask(*task); if (res.row_count) { diff --git a/src/Storages/MergeTree/MergeTreeSelectProcessor.h b/src/Storages/MergeTree/MergeTreeSelectProcessor.h index 8a9e3580a9f..33069a78e33 100644 --- a/src/Storages/MergeTree/MergeTreeSelectProcessor.h +++ b/src/Storages/MergeTree/MergeTreeSelectProcessor.h @@ -57,7 +57,6 @@ public: MergeTreeSelectAlgorithmPtr algorithm_, const PrewhereInfoPtr & prewhere_info_, const ExpressionActionsSettings & actions_settings_, - const MergeTreeReadTask::BlockSizeParams & block_size_params_, const MergeTreeReaderSettings & reader_settings_); String getName() const; diff --git a/src/Storages/MergeTree/MergeTreeSettings.cpp b/src/Storages/MergeTree/MergeTreeSettings.cpp index e76c4628681..8c6aafe48f2 100644 --- a/src/Storages/MergeTree/MergeTreeSettings.cpp +++ b/src/Storages/MergeTree/MergeTreeSettings.cpp @@ -34,208 +34,208 @@ namespace ErrorCodes * and should not be changed by the user without a reason. */ -#define MERGE_TREE_SETTINGS(M, ALIAS) \ - M(UInt64, min_compress_block_size, 0, "When granule is written, compress the data in buffer if the size of pending uncompressed data is larger or equal than the specified threshold. If this setting is not set, the corresponding global setting is used.", 0) \ - M(UInt64, max_compress_block_size, 0, "Compress the pending uncompressed data in buffer if its size is larger or equal than the specified threshold. Block of data will be compressed even if the current granule is not finished. If this setting is not set, the corresponding global setting is used.", 0) \ - M(UInt64, index_granularity, 8192, "How many rows correspond to one primary key value.", 0) \ - M(UInt64, max_digestion_size_per_segment, 256_MiB, "Max number of bytes to digest per segment to build GIN index.", 0) \ +#define MERGE_TREE_SETTINGS(DECLARE, ALIAS) \ + DECLARE(UInt64, min_compress_block_size, 0, "When granule is written, compress the data in buffer if the size of pending uncompressed data is larger or equal than the specified threshold. If this setting is not set, the corresponding global setting is used.", 0) \ + DECLARE(UInt64, max_compress_block_size, 0, "Compress the pending uncompressed data in buffer if its size is larger or equal than the specified threshold. Block of data will be compressed even if the current granule is not finished. If this setting is not set, the corresponding global setting is used.", 0) \ + DECLARE(UInt64, index_granularity, 8192, "How many rows correspond to one primary key value.", 0) \ + DECLARE(UInt64, max_digestion_size_per_segment, 256_MiB, "Max number of bytes to digest per segment to build GIN index.", 0) \ \ /** Data storing format settings. */ \ - M(UInt64, min_bytes_for_wide_part, 10485760, "Minimal uncompressed size in bytes to create part in wide format instead of compact", 0) \ - M(UInt64, min_rows_for_wide_part, 0, "Minimal number of rows to create part in wide format instead of compact", 0) \ - M(Float, ratio_of_defaults_for_sparse_serialization, 0.9375f, "Minimal ratio of number of default values to number of all values in column to store it in sparse serializations. If >= 1, columns will be always written in full serialization.", 0) \ - M(Bool, replace_long_file_name_to_hash, true, "If the file name for column is too long (more than 'max_file_name_length' bytes) replace it to SipHash128", 0) \ - M(UInt64, max_file_name_length, 127, "The maximal length of the file name to keep it as is without hashing", 0) \ - M(UInt64, min_bytes_for_full_part_storage, 0, "Only available in ClickHouse Cloud", 0) \ - M(UInt64, min_rows_for_full_part_storage, 0, "Only available in ClickHouse Cloud", 0) \ - M(UInt64, compact_parts_max_bytes_to_buffer, 128 * 1024 * 1024, "Only available in ClickHouse Cloud", 0) \ - M(UInt64, compact_parts_max_granules_to_buffer, 128, "Only available in ClickHouse Cloud", 0) \ - M(UInt64, compact_parts_merge_max_bytes_to_prefetch_part, 16 * 1024 * 1024, "Only available in ClickHouse Cloud", 0) \ - M(Bool, load_existing_rows_count_for_old_parts, false, "Whether to load existing_rows_count for existing parts. If false, existing_rows_count will be equal to rows_count for existing parts.", 0) \ - M(Bool, use_compact_variant_discriminators_serialization, true, "Use compact version of Variant discriminators serialization.", 0) \ + DECLARE(UInt64, min_bytes_for_wide_part, 10485760, "Minimal uncompressed size in bytes to create part in wide format instead of compact", 0) \ + DECLARE(UInt64, min_rows_for_wide_part, 0, "Minimal number of rows to create part in wide format instead of compact", 0) \ + DECLARE(Float, ratio_of_defaults_for_sparse_serialization, 0.9375f, "Minimal ratio of number of default values to number of all values in column to store it in sparse serializations. If >= 1, columns will be always written in full serialization.", 0) \ + DECLARE(Bool, replace_long_file_name_to_hash, true, "If the file name for column is too long (more than 'max_file_name_length' bytes) replace it to SipHash128", 0) \ + DECLARE(UInt64, max_file_name_length, 127, "The maximal length of the file name to keep it as is without hashing", 0) \ + DECLARE(UInt64, min_bytes_for_full_part_storage, 0, "Only available in ClickHouse Cloud", 0) \ + DECLARE(UInt64, min_rows_for_full_part_storage, 0, "Only available in ClickHouse Cloud", 0) \ + DECLARE(UInt64, compact_parts_max_bytes_to_buffer, 128 * 1024 * 1024, "Only available in ClickHouse Cloud", 0) \ + DECLARE(UInt64, compact_parts_max_granules_to_buffer, 128, "Only available in ClickHouse Cloud", 0) \ + DECLARE(UInt64, compact_parts_merge_max_bytes_to_prefetch_part, 16 * 1024 * 1024, "Only available in ClickHouse Cloud", 0) \ + DECLARE(Bool, load_existing_rows_count_for_old_parts, false, "Whether to load existing_rows_count for existing parts. If false, existing_rows_count will be equal to rows_count for existing parts.", 0) \ + DECLARE(Bool, use_compact_variant_discriminators_serialization, true, "Use compact version of Variant discriminators serialization.", 0) \ \ /** Merge selector settings. */ \ - M(UInt64, merge_selector_blurry_base_scale_factor, 0, "Controls when the logic kicks in relatively to the number of parts in partition. The bigger the factor the more belated reaction will be.", 0) \ - M(UInt64, merge_selector_window_size, 1000, "How many parts to look at once.", 0) \ + DECLARE(UInt64, merge_selector_blurry_base_scale_factor, 0, "Controls when the logic kicks in relatively to the number of parts in partition. The bigger the factor the more belated reaction will be.", 0) \ + DECLARE(UInt64, merge_selector_window_size, 1000, "How many parts to look at once.", 0) \ \ /** Merge settings. */ \ - M(UInt64, merge_max_block_size, 8192, "How many rows in blocks should be formed for merge operations. By default has the same value as `index_granularity`.", 0) \ - M(UInt64, merge_max_block_size_bytes, 10 * 1024 * 1024, "How many bytes in blocks should be formed for merge operations. By default has the same value as `index_granularity_bytes`.", 0) \ - M(UInt64, max_bytes_to_merge_at_max_space_in_pool, 150ULL * 1024 * 1024 * 1024, "Maximum in total size of parts to merge, when there are maximum free threads in background pool (or entries in replication queue).", 0) \ - M(UInt64, max_bytes_to_merge_at_min_space_in_pool, 1024 * 1024, "Maximum in total size of parts to merge, when there are minimum free threads in background pool (or entries in replication queue).", 0) \ - M(UInt64, max_replicated_merges_in_queue, 1000, "How many tasks of merging and mutating parts are allowed simultaneously in ReplicatedMergeTree queue.", 0) \ - M(UInt64, max_replicated_mutations_in_queue, 8, "How many tasks of mutating parts are allowed simultaneously in ReplicatedMergeTree queue.", 0) \ - M(UInt64, max_replicated_merges_with_ttl_in_queue, 1, "How many tasks of merging parts with TTL are allowed simultaneously in ReplicatedMergeTree queue.", 0) \ - M(UInt64, number_of_free_entries_in_pool_to_lower_max_size_of_merge, 8, "When there is less than specified number of free entries in pool (or replicated queue), start to lower maximum size of merge to process (or to put in queue). This is to allow small merges to process - not filling the pool with long running merges.", 0) \ - M(UInt64, number_of_free_entries_in_pool_to_execute_mutation, 20, "When there is less than specified number of free entries in pool, do not execute part mutations. This is to leave free threads for regular merges and avoid \"Too many parts\"", 0) \ - M(UInt64, max_number_of_mutations_for_replica, 0, "Limit the number of part mutations per replica to the specified amount. Zero means no limit on the number of mutations per replica (the execution can still be constrained by other settings).", 0) \ - M(UInt64, max_number_of_merges_with_ttl_in_pool, 2, "When there is more than specified number of merges with TTL entries in pool, do not assign new merge with TTL. This is to leave free threads for regular merges and avoid \"Too many parts\"", 0) \ - M(Seconds, old_parts_lifetime, 8 * 60, "How many seconds to keep obsolete parts.", 0) \ - M(Seconds, temporary_directories_lifetime, 86400, "How many seconds to keep tmp_-directories. You should not lower this value because merges and mutations may not be able to work with low value of this setting.", 0) \ - M(Seconds, lock_acquire_timeout_for_background_operations, DBMS_DEFAULT_LOCK_ACQUIRE_TIMEOUT_SEC, "For background operations like merges, mutations etc. How many seconds before failing to acquire table locks.", 0) \ - M(UInt64, min_rows_to_fsync_after_merge, 0, "Minimal number of rows to do fsync for part after merge (0 - disabled)", 0) \ - M(UInt64, min_compressed_bytes_to_fsync_after_merge, 0, "Minimal number of compressed bytes to do fsync for part after merge (0 - disabled)", 0) \ - M(UInt64, min_compressed_bytes_to_fsync_after_fetch, 0, "Minimal number of compressed bytes to do fsync for part after fetch (0 - disabled)", 0) \ - M(Bool, fsync_after_insert, false, "Do fsync for every inserted part. Significantly decreases performance of inserts, not recommended to use with wide parts.", 0) \ - M(Bool, fsync_part_directory, false, "Do fsync for part directory after all part operations (writes, renames, etc.).", 0) \ - M(UInt64, non_replicated_deduplication_window, 0, "How many last blocks of hashes should be kept on disk (0 - disabled).", 0) \ - M(UInt64, max_parts_to_merge_at_once, 100, "Max amount of parts which can be merged at once (0 - disabled). Doesn't affect OPTIMIZE FINAL query.", 0) \ - M(UInt64, merge_selecting_sleep_ms, 5000, "Minimum time to wait before trying to select parts to merge again after no parts were selected. A lower setting will trigger selecting tasks in background_schedule_pool frequently which result in large amount of requests to zookeeper in large-scale clusters", 0) \ - M(UInt64, max_merge_selecting_sleep_ms, 60000, "Maximum time to wait before trying to select parts to merge again after no parts were selected. A lower setting will trigger selecting tasks in background_schedule_pool frequently which result in large amount of requests to zookeeper in large-scale clusters", 0) \ - M(Float, merge_selecting_sleep_slowdown_factor, 1.2f, "The sleep time for merge selecting task is multiplied by this factor when there's nothing to merge and divided when a merge was assigned", 0) \ - M(UInt64, merge_tree_clear_old_temporary_directories_interval_seconds, 60, "The period of executing the clear old temporary directories operation in background.", 0) \ - M(UInt64, merge_tree_clear_old_parts_interval_seconds, 1, "The period of executing the clear old parts operation in background.", 0) \ - M(UInt64, min_age_to_force_merge_seconds, 0, "If all parts in a certain range are older than this value, range will be always eligible for merging. Set to 0 to disable.", 0) \ - M(Bool, min_age_to_force_merge_on_partition_only, false, "Whether min_age_to_force_merge_seconds should be applied only on the entire partition and not on subset.", false) \ - M(UInt64, number_of_free_entries_in_pool_to_execute_optimize_entire_partition, 25, "When there is less than specified number of free entries in pool, do not try to execute optimize entire partition with a merge (this merge is created when set min_age_to_force_merge_seconds > 0 and min_age_to_force_merge_on_partition_only = true). This is to leave free threads for regular merges and avoid \"Too many parts\"", 0) \ - M(Bool, remove_rolled_back_parts_immediately, 1, "Setting for an incomplete experimental feature.", 0) \ - M(UInt64, replicated_max_mutations_in_one_entry, 10000, "Max number of mutation commands that can be merged together and executed in one MUTATE_PART entry (0 means unlimited)", 0) \ - M(UInt64, number_of_mutations_to_delay, 500, "If table has at least that many unfinished mutations, artificially slow down mutations of table. Disabled if set to 0", 0) \ - M(UInt64, number_of_mutations_to_throw, 1000, "If table has at least that many unfinished mutations, throw 'Too many mutations' exception. Disabled if set to 0", 0) \ - M(UInt64, min_delay_to_mutate_ms, 10, "Min delay of mutating MergeTree table in milliseconds, if there are a lot of unfinished mutations", 0) \ - M(UInt64, max_delay_to_mutate_ms, 1000, "Max delay of mutating MergeTree table in milliseconds, if there are a lot of unfinished mutations", 0) \ - M(Bool, exclude_deleted_rows_for_part_size_in_merge, false, "Use an estimated source part size (excluding lightweight deleted rows) when selecting parts to merge", 0) \ - M(String, merge_workload, "", "Name of workload to be used to access resources for merges", 0) \ - M(String, mutation_workload, "", "Name of workload to be used to access resources for mutations", 0) \ - M(Milliseconds, background_task_preferred_step_execution_time_ms, 50, "Target time to execution of one step of merge or mutation. Can be exceeded if one step takes longer time", 0) \ - M(MergeSelectorAlgorithm, merge_selector_algorithm, MergeSelectorAlgorithm::SIMPLE, "The algorithm to select parts for merges assignment", 0) \ + DECLARE(UInt64, merge_max_block_size, 8192, "How many rows in blocks should be formed for merge operations. By default has the same value as `index_granularity`.", 0) \ + DECLARE(UInt64, merge_max_block_size_bytes, 10 * 1024 * 1024, "How many bytes in blocks should be formed for merge operations. By default has the same value as `index_granularity_bytes`.", 0) \ + DECLARE(UInt64, max_bytes_to_merge_at_max_space_in_pool, 150ULL * 1024 * 1024 * 1024, "Maximum in total size of parts to merge, when there are maximum free threads in background pool (or entries in replication queue).", 0) \ + DECLARE(UInt64, max_bytes_to_merge_at_min_space_in_pool, 1024 * 1024, "Maximum in total size of parts to merge, when there are minimum free threads in background pool (or entries in replication queue).", 0) \ + DECLARE(UInt64, max_replicated_merges_in_queue, 1000, "How many tasks of merging and mutating parts are allowed simultaneously in ReplicatedMergeTree queue.", 0) \ + DECLARE(UInt64, max_replicated_mutations_in_queue, 8, "How many tasks of mutating parts are allowed simultaneously in ReplicatedMergeTree queue.", 0) \ + DECLARE(UInt64, max_replicated_merges_with_ttl_in_queue, 1, "How many tasks of merging parts with TTL are allowed simultaneously in ReplicatedMergeTree queue.", 0) \ + DECLARE(UInt64, number_of_free_entries_in_pool_to_lower_max_size_of_merge, 8, "When there is less than specified number of free entries in pool (or replicated queue), start to lower maximum size of merge to process (or to put in queue). This is to allow small merges to process - not filling the pool with long running merges.", 0) \ + DECLARE(UInt64, number_of_free_entries_in_pool_to_execute_mutation, 20, "When there is less than specified number of free entries in pool, do not execute part mutations. This is to leave free threads for regular merges and avoid \"Too many parts\"", 0) \ + DECLARE(UInt64, max_number_of_mutations_for_replica, 0, "Limit the number of part mutations per replica to the specified amount. Zero means no limit on the number of mutations per replica (the execution can still be constrained by other settings).", 0) \ + DECLARE(UInt64, max_number_of_merges_with_ttl_in_pool, 2, "When there is more than specified number of merges with TTL entries in pool, do not assign new merge with TTL. This is to leave free threads for regular merges and avoid \"Too many parts\"", 0) \ + DECLARE(Seconds, old_parts_lifetime, 8 * 60, "How many seconds to keep obsolete parts.", 0) \ + DECLARE(Seconds, temporary_directories_lifetime, 86400, "How many seconds to keep tmp_-directories. You should not lower this value because merges and mutations may not be able to work with low value of this setting.", 0) \ + DECLARE(Seconds, lock_acquire_timeout_for_background_operations, DBMS_DEFAULT_LOCK_ACQUIRE_TIMEOUT_SEC, "For background operations like merges, mutations etc. How many seconds before failing to acquire table locks.", 0) \ + DECLARE(UInt64, min_rows_to_fsync_after_merge, 0, "Minimal number of rows to do fsync for part after merge (0 - disabled)", 0) \ + DECLARE(UInt64, min_compressed_bytes_to_fsync_after_merge, 0, "Minimal number of compressed bytes to do fsync for part after merge (0 - disabled)", 0) \ + DECLARE(UInt64, min_compressed_bytes_to_fsync_after_fetch, 0, "Minimal number of compressed bytes to do fsync for part after fetch (0 - disabled)", 0) \ + DECLARE(Bool, fsync_after_insert, false, "Do fsync for every inserted part. Significantly decreases performance of inserts, not recommended to use with wide parts.", 0) \ + DECLARE(Bool, fsync_part_directory, false, "Do fsync for part directory after all part operations (writes, renames, etc.).", 0) \ + DECLARE(UInt64, non_replicated_deduplication_window, 0, "How many last blocks of hashes should be kept on disk (0 - disabled).", 0) \ + DECLARE(UInt64, max_parts_to_merge_at_once, 100, "Max amount of parts which can be merged at once (0 - disabled). Doesn't affect OPTIMIZE FINAL query.", 0) \ + DECLARE(UInt64, merge_selecting_sleep_ms, 5000, "Minimum time to wait before trying to select parts to merge again after no parts were selected. A lower setting will trigger selecting tasks in background_schedule_pool frequently which result in large amount of requests to zookeeper in large-scale clusters", 0) \ + DECLARE(UInt64, max_merge_selecting_sleep_ms, 60000, "Maximum time to wait before trying to select parts to merge again after no parts were selected. A lower setting will trigger selecting tasks in background_schedule_pool frequently which result in large amount of requests to zookeeper in large-scale clusters", 0) \ + DECLARE(Float, merge_selecting_sleep_slowdown_factor, 1.2f, "The sleep time for merge selecting task is multiplied by this factor when there's nothing to merge and divided when a merge was assigned", 0) \ + DECLARE(UInt64, merge_tree_clear_old_temporary_directories_interval_seconds, 60, "The period of executing the clear old temporary directories operation in background.", 0) \ + DECLARE(UInt64, merge_tree_clear_old_parts_interval_seconds, 1, "The period of executing the clear old parts operation in background.", 0) \ + DECLARE(UInt64, min_age_to_force_merge_seconds, 0, "If all parts in a certain range are older than this value, range will be always eligible for merging. Set to 0 to disable.", 0) \ + DECLARE(Bool, min_age_to_force_merge_on_partition_only, false, "Whether min_age_to_force_merge_seconds should be applied only on the entire partition and not on subset.", false) \ + DECLARE(UInt64, number_of_free_entries_in_pool_to_execute_optimize_entire_partition, 25, "When there is less than specified number of free entries in pool, do not try to execute optimize entire partition with a merge (this merge is created when set min_age_to_force_merge_seconds > 0 and min_age_to_force_merge_on_partition_only = true). This is to leave free threads for regular merges and avoid \"Too many parts\"", 0) \ + DECLARE(Bool, remove_rolled_back_parts_immediately, 1, "Setting for an incomplete experimental feature.", 0) \ + DECLARE(UInt64, replicated_max_mutations_in_one_entry, 10000, "Max number of mutation commands that can be merged together and executed in one MUTATE_PART entry (0 means unlimited)", 0) \ + DECLARE(UInt64, number_of_mutations_to_delay, 500, "If table has at least that many unfinished mutations, artificially slow down mutations of table. Disabled if set to 0", 0) \ + DECLARE(UInt64, number_of_mutations_to_throw, 1000, "If table has at least that many unfinished mutations, throw 'Too many mutations' exception. Disabled if set to 0", 0) \ + DECLARE(UInt64, min_delay_to_mutate_ms, 10, "Min delay of mutating MergeTree table in milliseconds, if there are a lot of unfinished mutations", 0) \ + DECLARE(UInt64, max_delay_to_mutate_ms, 1000, "Max delay of mutating MergeTree table in milliseconds, if there are a lot of unfinished mutations", 0) \ + DECLARE(Bool, exclude_deleted_rows_for_part_size_in_merge, false, "Use an estimated source part size (excluding lightweight deleted rows) when selecting parts to merge", 0) \ + DECLARE(String, merge_workload, "", "Name of workload to be used to access resources for merges", 0) \ + DECLARE(String, mutation_workload, "", "Name of workload to be used to access resources for mutations", 0) \ + DECLARE(Milliseconds, background_task_preferred_step_execution_time_ms, 50, "Target time to execution of one step of merge or mutation. Can be exceeded if one step takes longer time", 0) \ + DECLARE(MergeSelectorAlgorithm, merge_selector_algorithm, MergeSelectorAlgorithm::SIMPLE, "The algorithm to select parts for merges assignment", 0) \ \ /** Inserts settings. */ \ - M(UInt64, parts_to_delay_insert, 1000, "If table contains at least that many active parts in single partition, artificially slow down insert into table. Disabled if set to 0", 0) \ - M(UInt64, inactive_parts_to_delay_insert, 0, "If table contains at least that many inactive parts in single partition, artificially slow down insert into table.", 0) \ - M(UInt64, parts_to_throw_insert, 3000, "If more than this number active parts in single partition, throw 'Too many parts ...' exception.", 0) \ - M(UInt64, inactive_parts_to_throw_insert, 0, "If more than this number inactive parts in single partition, throw 'Too many inactive parts ...' exception.", 0) \ - M(UInt64, max_avg_part_size_for_too_many_parts, 1ULL * 1024 * 1024 * 1024, "The 'too many parts' check according to 'parts_to_delay_insert' and 'parts_to_throw_insert' will be active only if the average part size (in the relevant partition) is not larger than the specified threshold. If it is larger than the specified threshold, the INSERTs will be neither delayed or rejected. This allows to have hundreds of terabytes in a single table on a single server if the parts are successfully merged to larger parts. This does not affect the thresholds on inactive parts or total parts.", 0) \ - M(UInt64, max_delay_to_insert, 1, "Max delay of inserting data into MergeTree table in seconds, if there are a lot of unmerged parts in single partition.", 0) \ - M(UInt64, min_delay_to_insert_ms, 10, "Min delay of inserting data into MergeTree table in milliseconds, if there are a lot of unmerged parts in single partition.", 0) \ - M(UInt64, max_parts_in_total, 100000, "If more than this number active parts in all partitions in total, throw 'Too many parts ...' exception.", 0) \ - M(Bool, async_insert, false, "If true, data from INSERT query is stored in queue and later flushed to table in background.", 0) \ - M(Bool, add_implicit_sign_column_constraint_for_collapsing_engine, false, "If true, add implicit constraint for sign column for CollapsingMergeTree engine.", 0) \ - M(Milliseconds, sleep_before_commit_local_part_in_replicated_table_ms, 0, "For testing. Do not change it.", 0) \ - M(Bool, optimize_row_order, false, "Allow reshuffling of rows during part inserts and merges to improve the compressibility of the new part", 0) \ - M(Bool, use_adaptive_write_buffer_for_dynamic_subcolumns, true, "Allow to use adaptive writer buffers during writing dynamic subcolumns to reduce memory usage", 0) \ - M(UInt64, adaptive_write_buffer_initial_size, 16 * 1024, "Initial size of an adaptive write buffer", 0) \ - M(UInt64, min_free_disk_bytes_to_perform_insert, 0, "Minimum free disk space bytes to perform an insert.", 0) \ - M(Float, min_free_disk_ratio_to_perform_insert, 0.0, "Minimum free disk space ratio to perform an insert.", 0) \ + DECLARE(UInt64, parts_to_delay_insert, 1000, "If table contains at least that many active parts in single partition, artificially slow down insert into table. Disabled if set to 0", 0) \ + DECLARE(UInt64, inactive_parts_to_delay_insert, 0, "If table contains at least that many inactive parts in single partition, artificially slow down insert into table.", 0) \ + DECLARE(UInt64, parts_to_throw_insert, 3000, "If more than this number active parts in single partition, throw 'Too many parts ...' exception.", 0) \ + DECLARE(UInt64, inactive_parts_to_throw_insert, 0, "If more than this number inactive parts in single partition, throw 'Too many inactive parts ...' exception.", 0) \ + DECLARE(UInt64, max_avg_part_size_for_too_many_parts, 1ULL * 1024 * 1024 * 1024, "The 'too many parts' check according to 'parts_to_delay_insert' and 'parts_to_throw_insert' will be active only if the average part size (in the relevant partition) is not larger than the specified threshold. If it is larger than the specified threshold, the INSERTs will be neither delayed or rejected. This allows to have hundreds of terabytes in a single table on a single server if the parts are successfully merged to larger parts. This does not affect the thresholds on inactive parts or total parts.", 0) \ + DECLARE(UInt64, max_delay_to_insert, 1, "Max delay of inserting data into MergeTree table in seconds, if there are a lot of unmerged parts in single partition.", 0) \ + DECLARE(UInt64, min_delay_to_insert_ms, 10, "Min delay of inserting data into MergeTree table in milliseconds, if there are a lot of unmerged parts in single partition.", 0) \ + DECLARE(UInt64, max_parts_in_total, 100000, "If more than this number active parts in all partitions in total, throw 'Too many parts ...' exception.", 0) \ + DECLARE(Bool, async_insert, false, "If true, data from INSERT query is stored in queue and later flushed to table in background.", 0) \ + DECLARE(Bool, add_implicit_sign_column_constraint_for_collapsing_engine, false, "If true, add implicit constraint for sign column for CollapsingMergeTree engine.", 0) \ + DECLARE(Milliseconds, sleep_before_commit_local_part_in_replicated_table_ms, 0, "For testing. Do not change it.", 0) \ + DECLARE(Bool, optimize_row_order, false, "Allow reshuffling of rows during part inserts and merges to improve the compressibility of the new part", 0) \ + DECLARE(Bool, use_adaptive_write_buffer_for_dynamic_subcolumns, true, "Allow to use adaptive writer buffers during writing dynamic subcolumns to reduce memory usage", 0) \ + DECLARE(UInt64, adaptive_write_buffer_initial_size, 16 * 1024, "Initial size of an adaptive write buffer", 0) \ + DECLARE(UInt64, min_free_disk_bytes_to_perform_insert, 0, "Minimum free disk space bytes to perform an insert.", 0) \ + DECLARE(Float, min_free_disk_ratio_to_perform_insert, 0.0, "Minimum free disk space ratio to perform an insert.", 0) \ \ /* Part removal settings. */ \ - M(UInt64, simultaneous_parts_removal_limit, 0, "Maximum number of parts to remove during one CleanupThread iteration (0 means unlimited).", 0) \ + DECLARE(UInt64, simultaneous_parts_removal_limit, 0, "Maximum number of parts to remove during one CleanupThread iteration (0 means unlimited).", 0) \ \ /** Replication settings. */ \ - M(UInt64, replicated_deduplication_window, 1000, "How many last blocks of hashes should be kept in ZooKeeper (old blocks will be deleted).", 0) \ - M(UInt64, replicated_deduplication_window_seconds, 7 * 24 * 60 * 60 /* one week */, "Similar to \"replicated_deduplication_window\", but determines old blocks by their lifetime. Hash of an inserted block will be deleted (and the block will not be deduplicated after) if it outside of one \"window\". You can set very big replicated_deduplication_window to avoid duplicating INSERTs during that period of time.", 0) \ - M(UInt64, replicated_deduplication_window_for_async_inserts, 10000, "How many last hash values of async_insert blocks should be kept in ZooKeeper (old blocks will be deleted).", 0) \ - M(UInt64, replicated_deduplication_window_seconds_for_async_inserts, 7 * 24 * 60 * 60 /* one week */, "Similar to \"replicated_deduplication_window_for_async_inserts\", but determines old blocks by their lifetime. Hash of an inserted block will be deleted (and the block will not be deduplicated after) if it outside of one \"window\". You can set very big replicated_deduplication_window to avoid duplicating INSERTs during that period of time.", 0) \ - M(Milliseconds, async_block_ids_cache_update_wait_ms, 100, "How long each insert iteration will wait for async_block_ids_cache update", 0) \ - M(Bool, use_async_block_ids_cache, true, "Use in-memory cache to filter duplicated async inserts based on block ids", 0) \ - M(UInt64, max_replicated_logs_to_keep, 1000, "How many records may be in log, if there is inactive replica. Inactive replica becomes lost when when this number exceed.", 0) \ - M(UInt64, min_replicated_logs_to_keep, 10, "Keep about this number of last records in ZooKeeper log, even if they are obsolete. It doesn't affect work of tables: used only to diagnose ZooKeeper log before cleaning.", 0) \ - M(Seconds, prefer_fetch_merged_part_time_threshold, 3600, "If time passed after replication log entry creation exceeds this threshold and sum size of parts is greater than \"prefer_fetch_merged_part_size_threshold\", prefer fetching merged part from replica instead of doing merge locally. To speed up very long merges.", 0) \ - M(UInt64, prefer_fetch_merged_part_size_threshold, 10ULL * 1024 * 1024 * 1024, "If sum size of parts exceeds this threshold and time passed after replication log entry creation is greater than \"prefer_fetch_merged_part_time_threshold\", prefer fetching merged part from replica instead of doing merge locally. To speed up very long merges.", 0) \ - M(Seconds, execute_merges_on_single_replica_time_threshold, 0, "When greater than zero only a single replica starts the merge immediately, others wait up to that amount of time to download the result instead of doing merges locally. If the chosen replica doesn't finish the merge during that amount of time, fallback to standard behavior happens.", 0) \ - M(Seconds, remote_fs_execute_merges_on_single_replica_time_threshold, 3 * 60 * 60, "When greater than zero only a single replica starts the merge immediately if merged part on shared storage and 'allow_remote_fs_zero_copy_replication' is enabled.", 0) \ - M(Seconds, try_fetch_recompressed_part_timeout, 7200, "Recompression works slow in most cases, so we don't start merge with recompression until this timeout and trying to fetch recompressed part from replica which assigned this merge with recompression.", 0) \ - M(Bool, always_fetch_merged_part, false, "If true, replica never merge parts and always download merged parts from other replicas.", 0) \ - M(UInt64, max_suspicious_broken_parts, 100, "Max broken parts, if more - deny automatic deletion.", 0) \ - M(UInt64, max_suspicious_broken_parts_bytes, 1ULL * 1024 * 1024 * 1024, "Max size of all broken parts, if more - deny automatic deletion.", 0) \ - M(UInt64, max_files_to_modify_in_alter_columns, 75, "Not apply ALTER if number of files for modification(deletion, addition) more than this.", 0) \ - M(UInt64, max_files_to_remove_in_alter_columns, 50, "Not apply ALTER, if number of files for deletion more than this.", 0) \ - M(Float, replicated_max_ratio_of_wrong_parts, 0.5, "If ratio of wrong parts to total number of parts is less than this - allow to start.", 0) \ - M(Bool, replicated_can_become_leader, true, "If true, Replicated tables replicas on this node will try to acquire leadership.", 0) \ - M(Seconds, zookeeper_session_expiration_check_period, 60, "ZooKeeper session expiration check period, in seconds.", 0) \ - M(Seconds, initialization_retry_period, 60, "Retry period for table initialization, in seconds.", 0) \ - M(Bool, detach_old_local_parts_when_cloning_replica, true, "Do not remove old local parts when repairing lost replica.", 0) \ - M(Bool, detach_not_byte_identical_parts, false, "Do not remove non byte-idential parts for ReplicatedMergeTree, instead detach them (maybe useful for further analysis).", 0) \ - M(UInt64, max_replicated_fetches_network_bandwidth, 0, "The maximum speed of data exchange over the network in bytes per second for replicated fetches. Zero means unlimited.", 0) \ - M(UInt64, max_replicated_sends_network_bandwidth, 0, "The maximum speed of data exchange over the network in bytes per second for replicated sends. Zero means unlimited.", 0) \ - M(Milliseconds, wait_for_unique_parts_send_before_shutdown_ms, 0, "Before shutdown table will wait for required amount time for unique parts (exist only on current replica) to be fetched by other replicas (0 means disabled).", 0) \ - M(Float, fault_probability_before_part_commit, 0, "For testing. Do not change it.", 0) \ - M(Float, fault_probability_after_part_commit, 0, "For testing. Do not change it.", 0) \ - M(Bool, shared_merge_tree_disable_merges_and_mutations_assignment, false, "Only available in ClickHouse Cloud", 0) \ - M(Float, shared_merge_tree_partitions_hint_ratio_to_reload_merge_pred_for_mutations, 0.5, "Only available in ClickHouse Cloud", 0) \ - M(UInt64, shared_merge_tree_parts_load_batch_size, 32, "Only available in ClickHouse Cloud", 0) \ + DECLARE(UInt64, replicated_deduplication_window, 1000, "How many last blocks of hashes should be kept in ZooKeeper (old blocks will be deleted).", 0) \ + DECLARE(UInt64, replicated_deduplication_window_seconds, 7 * 24 * 60 * 60 /* one week */, "Similar to \"replicated_deduplication_window\", but determines old blocks by their lifetime. Hash of an inserted block will be deleted (and the block will not be deduplicated after) if it outside of one \"window\". You can set very big replicated_deduplication_window to avoid duplicating INSERTs during that period of time.", 0) \ + DECLARE(UInt64, replicated_deduplication_window_for_async_inserts, 10000, "How many last hash values of async_insert blocks should be kept in ZooKeeper (old blocks will be deleted).", 0) \ + DECLARE(UInt64, replicated_deduplication_window_seconds_for_async_inserts, 7 * 24 * 60 * 60 /* one week */, "Similar to \"replicated_deduplication_window_for_async_inserts\", but determines old blocks by their lifetime. Hash of an inserted block will be deleted (and the block will not be deduplicated after) if it outside of one \"window\". You can set very big replicated_deduplication_window to avoid duplicating INSERTs during that period of time.", 0) \ + DECLARE(Milliseconds, async_block_ids_cache_update_wait_ms, 100, "How long each insert iteration will wait for async_block_ids_cache update", 0) \ + DECLARE(Bool, use_async_block_ids_cache, true, "Use in-memory cache to filter duplicated async inserts based on block ids", 0) \ + DECLARE(UInt64, max_replicated_logs_to_keep, 1000, "How many records may be in log, if there is inactive replica. Inactive replica becomes lost when when this number exceed.", 0) \ + DECLARE(UInt64, min_replicated_logs_to_keep, 10, "Keep about this number of last records in ZooKeeper log, even if they are obsolete. It doesn't affect work of tables: used only to diagnose ZooKeeper log before cleaning.", 0) \ + DECLARE(Seconds, prefer_fetch_merged_part_time_threshold, 3600, "If time passed after replication log entry creation exceeds this threshold and sum size of parts is greater than \"prefer_fetch_merged_part_size_threshold\", prefer fetching merged part from replica instead of doing merge locally. To speed up very long merges.", 0) \ + DECLARE(UInt64, prefer_fetch_merged_part_size_threshold, 10ULL * 1024 * 1024 * 1024, "If sum size of parts exceeds this threshold and time passed after replication log entry creation is greater than \"prefer_fetch_merged_part_time_threshold\", prefer fetching merged part from replica instead of doing merge locally. To speed up very long merges.", 0) \ + DECLARE(Seconds, execute_merges_on_single_replica_time_threshold, 0, "When greater than zero only a single replica starts the merge immediately, others wait up to that amount of time to download the result instead of doing merges locally. If the chosen replica doesn't finish the merge during that amount of time, fallback to standard behavior happens.", 0) \ + DECLARE(Seconds, remote_fs_execute_merges_on_single_replica_time_threshold, 3 * 60 * 60, "When greater than zero only a single replica starts the merge immediately if merged part on shared storage and 'allow_remote_fs_zero_copy_replication' is enabled.", 0) \ + DECLARE(Seconds, try_fetch_recompressed_part_timeout, 7200, "Recompression works slow in most cases, so we don't start merge with recompression until this timeout and trying to fetch recompressed part from replica which assigned this merge with recompression.", 0) \ + DECLARE(Bool, always_fetch_merged_part, false, "If true, replica never merge parts and always download merged parts from other replicas.", 0) \ + DECLARE(UInt64, max_suspicious_broken_parts, 100, "Max broken parts, if more - deny automatic deletion.", 0) \ + DECLARE(UInt64, max_suspicious_broken_parts_bytes, 1ULL * 1024 * 1024 * 1024, "Max size of all broken parts, if more - deny automatic deletion.", 0) \ + DECLARE(UInt64, max_files_to_modify_in_alter_columns, 75, "Not apply ALTER if number of files for modification(deletion, addition) more than this.", 0) \ + DECLARE(UInt64, max_files_to_remove_in_alter_columns, 50, "Not apply ALTER, if number of files for deletion more than this.", 0) \ + DECLARE(Float, replicated_max_ratio_of_wrong_parts, 0.5, "If ratio of wrong parts to total number of parts is less than this - allow to start.", 0) \ + DECLARE(Bool, replicated_can_become_leader, true, "If true, Replicated tables replicas on this node will try to acquire leadership.", 0) \ + DECLARE(Seconds, zookeeper_session_expiration_check_period, 60, "ZooKeeper session expiration check period, in seconds.", 0) \ + DECLARE(Seconds, initialization_retry_period, 60, "Retry period for table initialization, in seconds.", 0) \ + DECLARE(Bool, detach_old_local_parts_when_cloning_replica, true, "Do not remove old local parts when repairing lost replica.", 0) \ + DECLARE(Bool, detach_not_byte_identical_parts, false, "Do not remove non byte-idential parts for ReplicatedMergeTree, instead detach them (maybe useful for further analysis).", 0) \ + DECLARE(UInt64, max_replicated_fetches_network_bandwidth, 0, "The maximum speed of data exchange over the network in bytes per second for replicated fetches. Zero means unlimited.", 0) \ + DECLARE(UInt64, max_replicated_sends_network_bandwidth, 0, "The maximum speed of data exchange over the network in bytes per second for replicated sends. Zero means unlimited.", 0) \ + DECLARE(Milliseconds, wait_for_unique_parts_send_before_shutdown_ms, 0, "Before shutdown table will wait for required amount time for unique parts (exist only on current replica) to be fetched by other replicas (0 means disabled).", 0) \ + DECLARE(Float, fault_probability_before_part_commit, 0, "For testing. Do not change it.", 0) \ + DECLARE(Float, fault_probability_after_part_commit, 0, "For testing. Do not change it.", 0) \ + DECLARE(Bool, shared_merge_tree_disable_merges_and_mutations_assignment, false, "Only available in ClickHouse Cloud", 0) \ + DECLARE(Float, shared_merge_tree_partitions_hint_ratio_to_reload_merge_pred_for_mutations, 0.5, "Only available in ClickHouse Cloud", 0) \ + DECLARE(UInt64, shared_merge_tree_parts_load_batch_size, 32, "Only available in ClickHouse Cloud", 0) \ \ /** Check delay of replicas settings. */ \ - M(UInt64, min_relative_delay_to_measure, 120, "Calculate relative replica delay only if absolute delay is not less that this value.", 0) \ - M(UInt64, cleanup_delay_period, 30, "Minimum period to clean old queue logs, blocks hashes and parts.", 0) \ - M(UInt64, max_cleanup_delay_period, 300, "Maximum period to clean old queue logs, blocks hashes and parts.", 0) \ - M(UInt64, cleanup_delay_period_random_add, 10, "Add uniformly distributed value from 0 to x seconds to cleanup_delay_period to avoid thundering herd effect and subsequent DoS of ZooKeeper in case of very large number of tables.", 0) \ - M(UInt64, cleanup_thread_preferred_points_per_iteration, 150, "Preferred batch size for background cleanup (points are abstract but 1 point is approximately equivalent to 1 inserted block).", 0) \ - M(UInt64, cleanup_threads, 128, "Only available in ClickHouse Cloud", 0) \ - M(UInt64, kill_delay_period, 30, "Only available in ClickHouse Cloud", 0) \ - M(UInt64, kill_delay_period_random_add, 10, "Only available in ClickHouse Cloud", 0) \ - M(UInt64, kill_threads, 128, "Only available in ClickHouse Cloud", 0) \ - M(UInt64, min_relative_delay_to_close, 300, "Minimal delay from other replicas to close, stop serving requests and not return Ok during status check.", 0) \ - M(UInt64, min_absolute_delay_to_close, 0, "Minimal absolute delay to close, stop serving requests and not return Ok during status check.", 0) \ - M(UInt64, enable_vertical_merge_algorithm, 1, "Enable usage of Vertical merge algorithm.", 0) \ - M(UInt64, vertical_merge_algorithm_min_rows_to_activate, 16 * 8192, "Minimal (approximate) sum of rows in merging parts to activate Vertical merge algorithm.", 0) \ - M(UInt64, vertical_merge_algorithm_min_bytes_to_activate, 0, "Minimal (approximate) uncompressed size in bytes in merging parts to activate Vertical merge algorithm.", 0) \ - M(UInt64, vertical_merge_algorithm_min_columns_to_activate, 11, "Minimal amount of non-PK columns to activate Vertical merge algorithm.", 0) \ - M(Bool, vertical_merge_remote_filesystem_prefetch, true, "If true prefetching of data from remote filesystem is used for the next column during merge", 0) \ - M(UInt64, max_postpone_time_for_failed_mutations_ms, 5ULL * 60 * 1000, "The maximum postpone time for failed mutations.", 0) \ + DECLARE(UInt64, min_relative_delay_to_measure, 120, "Calculate relative replica delay only if absolute delay is not less that this value.", 0) \ + DECLARE(UInt64, cleanup_delay_period, 30, "Minimum period to clean old queue logs, blocks hashes and parts.", 0) \ + DECLARE(UInt64, max_cleanup_delay_period, 300, "Maximum period to clean old queue logs, blocks hashes and parts.", 0) \ + DECLARE(UInt64, cleanup_delay_period_random_add, 10, "Add uniformly distributed value from 0 to x seconds to cleanup_delay_period to avoid thundering herd effect and subsequent DoS of ZooKeeper in case of very large number of tables.", 0) \ + DECLARE(UInt64, cleanup_thread_preferred_points_per_iteration, 150, "Preferred batch size for background cleanup (points are abstract but 1 point is approximately equivalent to 1 inserted block).", 0) \ + DECLARE(UInt64, cleanup_threads, 128, "Only available in ClickHouse Cloud", 0) \ + DECLARE(UInt64, kill_delay_period, 30, "Only available in ClickHouse Cloud", 0) \ + DECLARE(UInt64, kill_delay_period_random_add, 10, "Only available in ClickHouse Cloud", 0) \ + DECLARE(UInt64, kill_threads, 128, "Only available in ClickHouse Cloud", 0) \ + DECLARE(UInt64, min_relative_delay_to_close, 300, "Minimal delay from other replicas to close, stop serving requests and not return Ok during status check.", 0) \ + DECLARE(UInt64, min_absolute_delay_to_close, 0, "Minimal absolute delay to close, stop serving requests and not return Ok during status check.", 0) \ + DECLARE(UInt64, enable_vertical_merge_algorithm, 1, "Enable usage of Vertical merge algorithm.", 0) \ + DECLARE(UInt64, vertical_merge_algorithm_min_rows_to_activate, 16 * 8192, "Minimal (approximate) sum of rows in merging parts to activate Vertical merge algorithm.", 0) \ + DECLARE(UInt64, vertical_merge_algorithm_min_bytes_to_activate, 0, "Minimal (approximate) uncompressed size in bytes in merging parts to activate Vertical merge algorithm.", 0) \ + DECLARE(UInt64, vertical_merge_algorithm_min_columns_to_activate, 11, "Minimal amount of non-PK columns to activate Vertical merge algorithm.", 0) \ + DECLARE(Bool, vertical_merge_remote_filesystem_prefetch, true, "If true prefetching of data from remote filesystem is used for the next column during merge", 0) \ + DECLARE(UInt64, max_postpone_time_for_failed_mutations_ms, 5ULL * 60 * 1000, "The maximum postpone time for failed mutations.", 0) \ \ /** Compatibility settings */ \ - M(Bool, allow_suspicious_indices, false, "Reject primary/secondary indexes and sorting keys with identical expressions", 0) \ - M(Bool, compatibility_allow_sampling_expression_not_in_primary_key, false, "Allow to create a table with sampling expression not in primary key. This is needed only to temporarily allow to run the server with wrong tables for backward compatibility.", 0) \ - M(Bool, use_minimalistic_checksums_in_zookeeper, true, "Use small format (dozens bytes) for part checksums in ZooKeeper instead of ordinary ones (dozens KB). Before enabling check that all replicas support new format.", 0) \ - M(Bool, use_minimalistic_part_header_in_zookeeper, true, "Store part header (checksums and columns) in a compact format and a single part znode instead of separate znodes (/columns and /checksums). This can dramatically reduce snapshot size in ZooKeeper. Before enabling check that all replicas support new format.", 0) \ - M(UInt64, finished_mutations_to_keep, 100, "How many records about mutations that are done to keep. If zero, then keep all of them.", 0) \ - M(UInt64, min_merge_bytes_to_use_direct_io, 10ULL * 1024 * 1024 * 1024, "Minimal amount of bytes to enable O_DIRECT in merge (0 - disabled).", 0) \ - M(UInt64, index_granularity_bytes, 10 * 1024 * 1024, "Approximate amount of bytes in single granule (0 - disabled).", 0) \ - M(UInt64, min_index_granularity_bytes, 1024, "Minimum amount of bytes in single granule.", 1024) \ - M(Int64, merge_with_ttl_timeout, 3600 * 4, "Minimal time in seconds, when merge with delete TTL can be repeated.", 0) \ - M(Int64, merge_with_recompression_ttl_timeout, 3600 * 4, "Minimal time in seconds, when merge with recompression TTL can be repeated.", 0) \ - M(Bool, ttl_only_drop_parts, false, "Only drop altogether the expired parts and not partially prune them.", 0) \ - M(Bool, materialize_ttl_recalculate_only, false, "Only recalculate ttl info when MATERIALIZE TTL", 0) \ - M(Bool, enable_mixed_granularity_parts, true, "Enable parts with adaptive and non adaptive granularity", 0) \ - M(UInt64, concurrent_part_removal_threshold, 100, "Activate concurrent part removal (see 'max_part_removal_threads') only if the number of inactive data parts is at least this.", 0) \ - M(UInt64, zero_copy_concurrent_part_removal_max_split_times, 5, "Max recursion depth for splitting independent Outdated parts ranges into smaller subranges (highly not recommended to change)", 0) \ - M(Float, zero_copy_concurrent_part_removal_max_postpone_ratio, static_cast(0.05), "Max percentage of top level parts to postpone removal in order to get smaller independent ranges (highly not recommended to change)", 0) \ - M(String, storage_policy, "default", "Name of storage disk policy", 0) \ - M(String, disk, "", "Name of storage disk. Can be specified instead of storage policy.", 0) \ - M(Bool, allow_nullable_key, false, "Allow Nullable types as primary keys.", 0) \ - M(Bool, remove_empty_parts, true, "Remove empty parts after they were pruned by TTL, mutation, or collapsing merge algorithm.", 0) \ - M(Bool, assign_part_uuids, false, "Generate UUIDs for parts. Before enabling check that all replicas support new format.", 0) \ - M(Int64, max_partitions_to_read, -1, "Limit the max number of partitions that can be accessed in one query. <= 0 means unlimited. This setting is the default that can be overridden by the query-level setting with the same name.", 0) \ - M(UInt64, max_concurrent_queries, 0, "Max number of concurrently executed queries related to the MergeTree table (0 - disabled). Queries will still be limited by other max_concurrent_queries settings.", 0) \ - M(UInt64, min_marks_to_honor_max_concurrent_queries, 0, "Minimal number of marks to honor the MergeTree-level's max_concurrent_queries (0 - disabled). Queries will still be limited by other max_concurrent_queries settings.", 0) \ - M(UInt64, min_bytes_to_rebalance_partition_over_jbod, 0, "Minimal amount of bytes to enable part rebalance over JBOD array (0 - disabled).", 0) \ - M(Bool, check_sample_column_is_correct, true, "Check columns or columns by hash for sampling are unsigned integer.", 0) \ - M(Bool, allow_vertical_merges_from_compact_to_wide_parts, true, "Allows vertical merges from compact to wide parts. This settings must have the same value on all replicas", 0) \ - M(Bool, enable_the_endpoint_id_with_zookeeper_name_prefix, false, "Enable the endpoint id with zookeeper name prefix for the replicated merge tree table", 0) \ - M(UInt64, zero_copy_merge_mutation_min_parts_size_sleep_before_lock, 1ULL * 1024 * 1024 * 1024, "If zero copy replication is enabled sleep random amount of time before trying to lock depending on parts size for merge or mutation", 0) \ - M(Bool, allow_floating_point_partition_key, false, "Allow floating point as partition key", 0) \ - M(UInt64, sleep_before_loading_outdated_parts_ms, 0, "For testing. Do not change it.", 0) \ - M(Bool, always_use_copy_instead_of_hardlinks, false, "Always copy data instead of hardlinking during mutations/replaces/detaches and so on.", 0) \ - M(Bool, disable_freeze_partition_for_zero_copy_replication, true, "Disable FREEZE PARTITION query for zero copy replication.", 0) \ - M(Bool, disable_detach_partition_for_zero_copy_replication, true, "Disable DETACH PARTITION query for zero copy replication.", 0) \ - M(Bool, disable_fetch_partition_for_zero_copy_replication, true, "Disable FETCH PARTITION query for zero copy replication.", 0) \ - M(Bool, enable_block_number_column, false, "Enable persisting column _block_number for each row.", 0) ALIAS(allow_experimental_block_number_column) \ - M(Bool, enable_block_offset_column, false, "Enable persisting column _block_offset for each row.", 0) \ + DECLARE(Bool, allow_suspicious_indices, false, "Reject primary/secondary indexes and sorting keys with identical expressions", 0) \ + DECLARE(Bool, compatibility_allow_sampling_expression_not_in_primary_key, false, "Allow to create a table with sampling expression not in primary key. This is needed only to temporarily allow to run the server with wrong tables for backward compatibility.", 0) \ + DECLARE(Bool, use_minimalistic_checksums_in_zookeeper, true, "Use small format (dozens bytes) for part checksums in ZooKeeper instead of ordinary ones (dozens KB). Before enabling check that all replicas support new format.", 0) \ + DECLARE(Bool, use_minimalistic_part_header_in_zookeeper, true, "Store part header (checksums and columns) in a compact format and a single part znode instead of separate znodes (/columns and /checksums). This can dramatically reduce snapshot size in ZooKeeper. Before enabling check that all replicas support new format.", 0) \ + DECLARE(UInt64, finished_mutations_to_keep, 100, "How many records about mutations that are done to keep. If zero, then keep all of them.", 0) \ + DECLARE(UInt64, min_merge_bytes_to_use_direct_io, 10ULL * 1024 * 1024 * 1024, "Minimal amount of bytes to enable O_DIRECT in merge (0 - disabled).", 0) \ + DECLARE(UInt64, index_granularity_bytes, 10 * 1024 * 1024, "Approximate amount of bytes in single granule (0 - disabled).", 0) \ + DECLARE(UInt64, min_index_granularity_bytes, 1024, "Minimum amount of bytes in single granule.", 1024) \ + DECLARE(Int64, merge_with_ttl_timeout, 3600 * 4, "Minimal time in seconds, when merge with delete TTL can be repeated.", 0) \ + DECLARE(Int64, merge_with_recompression_ttl_timeout, 3600 * 4, "Minimal time in seconds, when merge with recompression TTL can be repeated.", 0) \ + DECLARE(Bool, ttl_only_drop_parts, false, "Only drop altogether the expired parts and not partially prune them.", 0) \ + DECLARE(Bool, materialize_ttl_recalculate_only, false, "Only recalculate ttl info when MATERIALIZE TTL", 0) \ + DECLARE(Bool, enable_mixed_granularity_parts, true, "Enable parts with adaptive and non adaptive granularity", 0) \ + DECLARE(UInt64, concurrent_part_removal_threshold, 100, "Activate concurrent part removal (see 'max_part_removal_threads') only if the number of inactive data parts is at least this.", 0) \ + DECLARE(UInt64, zero_copy_concurrent_part_removal_max_split_times, 5, "Max recursion depth for splitting independent Outdated parts ranges into smaller subranges (highly not recommended to change)", 0) \ + DECLARE(Float, zero_copy_concurrent_part_removal_max_postpone_ratio, static_cast(0.05), "Max percentage of top level parts to postpone removal in order to get smaller independent ranges (highly not recommended to change)", 0) \ + DECLARE(String, storage_policy, "default", "Name of storage disk policy", 0) \ + DECLARE(String, disk, "", "Name of storage disk. Can be specified instead of storage policy.", 0) \ + DECLARE(Bool, allow_nullable_key, false, "Allow Nullable types as primary keys.", 0) \ + DECLARE(Bool, remove_empty_parts, true, "Remove empty parts after they were pruned by TTL, mutation, or collapsing merge algorithm.", 0) \ + DECLARE(Bool, assign_part_uuids, false, "Generate UUIDs for parts. Before enabling check that all replicas support new format.", 0) \ + DECLARE(Int64, max_partitions_to_read, -1, "Limit the max number of partitions that can be accessed in one query. <= 0 means unlimited. This setting is the default that can be overridden by the query-level setting with the same name.", 0) \ + DECLARE(UInt64, max_concurrent_queries, 0, "Max number of concurrently executed queries related to the MergeTree table (0 - disabled). Queries will still be limited by other max_concurrent_queries settings.", 0) \ + DECLARE(UInt64, min_marks_to_honor_max_concurrent_queries, 0, "Minimal number of marks to honor the MergeTree-level's max_concurrent_queries (0 - disabled). Queries will still be limited by other max_concurrent_queries settings.", 0) \ + DECLARE(UInt64, min_bytes_to_rebalance_partition_over_jbod, 0, "Minimal amount of bytes to enable part rebalance over JBOD array (0 - disabled).", 0) \ + DECLARE(Bool, check_sample_column_is_correct, true, "Check columns or columns by hash for sampling are unsigned integer.", 0) \ + DECLARE(Bool, allow_vertical_merges_from_compact_to_wide_parts, true, "Allows vertical merges from compact to wide parts. This settings must have the same value on all replicas", 0) \ + DECLARE(Bool, enable_the_endpoint_id_with_zookeeper_name_prefix, false, "Enable the endpoint id with zookeeper name prefix for the replicated merge tree table", 0) \ + DECLARE(UInt64, zero_copy_merge_mutation_min_parts_size_sleep_before_lock, 1ULL * 1024 * 1024 * 1024, "If zero copy replication is enabled sleep random amount of time before trying to lock depending on parts size for merge or mutation", 0) \ + DECLARE(Bool, allow_floating_point_partition_key, false, "Allow floating point as partition key", 0) \ + DECLARE(UInt64, sleep_before_loading_outdated_parts_ms, 0, "For testing. Do not change it.", 0) \ + DECLARE(Bool, always_use_copy_instead_of_hardlinks, false, "Always copy data instead of hardlinking during mutations/replaces/detaches and so on.", 0) \ + DECLARE(Bool, disable_freeze_partition_for_zero_copy_replication, true, "Disable FREEZE PARTITION query for zero copy replication.", 0) \ + DECLARE(Bool, disable_detach_partition_for_zero_copy_replication, true, "Disable DETACH PARTITION query for zero copy replication.", 0) \ + DECLARE(Bool, disable_fetch_partition_for_zero_copy_replication, true, "Disable FETCH PARTITION query for zero copy replication.", 0) \ + DECLARE(Bool, enable_block_number_column, false, "Enable persisting column _block_number for each row.", 0) ALIAS(allow_experimental_block_number_column) \ + DECLARE(Bool, enable_block_offset_column, false, "Enable persisting column _block_offset for each row.", 0) \ \ /** Experimental/work in progress feature. Unsafe for production. */ \ - M(UInt64, part_moves_between_shards_enable, 0, "Experimental/Incomplete feature to move parts between shards. Does not take into account sharding expressions.", 0) \ - M(UInt64, part_moves_between_shards_delay_seconds, 30, "Time to wait before/after moving parts between shards.", 0) \ - M(Bool, allow_remote_fs_zero_copy_replication, false, "Don't use this setting in production, because it is not ready.", 0) \ - M(String, remote_fs_zero_copy_zookeeper_path, "/clickhouse/zero_copy", "ZooKeeper path for zero-copy table-independent info.", 0) \ - M(Bool, remote_fs_zero_copy_path_compatible_mode, false, "Run zero-copy in compatible mode during conversion process.", 0) \ - M(Bool, cache_populated_by_fetch, false, "Only available in ClickHouse Cloud", 0) \ - M(Bool, force_read_through_cache_for_merges, false, "Force read-through filesystem cache for merges", 0) \ - M(Bool, allow_experimental_replacing_merge_with_cleanup, false, "Allow experimental CLEANUP merges for ReplacingMergeTree with is_deleted column.", 0) \ + DECLARE(UInt64, part_moves_between_shards_enable, 0, "Experimental/Incomplete feature to move parts between shards. Does not take into account sharding expressions.", 0) \ + DECLARE(UInt64, part_moves_between_shards_delay_seconds, 30, "Time to wait before/after moving parts between shards.", 0) \ + DECLARE(Bool, allow_remote_fs_zero_copy_replication, false, "Don't use this setting in production, because it is not ready.", 0) \ + DECLARE(String, remote_fs_zero_copy_zookeeper_path, "/clickhouse/zero_copy", "ZooKeeper path for zero-copy table-independent info.", 0) \ + DECLARE(Bool, remote_fs_zero_copy_path_compatible_mode, false, "Run zero-copy in compatible mode during conversion process.", 0) \ + DECLARE(Bool, cache_populated_by_fetch, false, "Only available in ClickHouse Cloud", 0) \ + DECLARE(Bool, force_read_through_cache_for_merges, false, "Force read-through filesystem cache for merges", 0) \ + DECLARE(Bool, allow_experimental_replacing_merge_with_cleanup, false, "Allow experimental CLEANUP merges for ReplacingMergeTree with is_deleted column.", 0) \ \ /** Compress marks and primary key. */ \ - M(Bool, compress_marks, true, "Marks support compression, reduce mark file size and speed up network transmission.", 0) \ - M(Bool, compress_primary_key, true, "Primary key support compression, reduce primary key file size and speed up network transmission.", 0) \ - M(String, marks_compression_codec, "ZSTD(3)", "Compression encoding used by marks, marks are small enough and cached, so the default compression is ZSTD(3).", 0) \ - M(String, primary_key_compression_codec, "ZSTD(3)", "Compression encoding used by primary, primary key is small enough and cached, so the default compression is ZSTD(3).", 0) \ - M(UInt64, marks_compress_block_size, 65536, "Mark compress block size, the actual size of the block to compress.", 0) \ - M(UInt64, primary_key_compress_block_size, 65536, "Primary compress block size, the actual size of the block to compress.", 0) \ - M(Bool, primary_key_lazy_load, true, "Load primary key in memory on first use instead of on table initialization. This can save memory in the presence of a large number of tables.", 0) \ - M(Float, primary_key_ratio_of_unique_prefix_values_to_skip_suffix_columns, 0.9f, "If the value of a column of the primary key in data part changes at least in this ratio of times, skip loading next columns in memory. This allows to save memory usage by not loading useless columns of the primary key.", 0) \ + DECLARE(Bool, compress_marks, true, "Marks support compression, reduce mark file size and speed up network transmission.", 0) \ + DECLARE(Bool, compress_primary_key, true, "Primary key support compression, reduce primary key file size and speed up network transmission.", 0) \ + DECLARE(String, marks_compression_codec, "ZSTD(3)", "Compression encoding used by marks, marks are small enough and cached, so the default compression is ZSTD(3).", 0) \ + DECLARE(String, primary_key_compression_codec, "ZSTD(3)", "Compression encoding used by primary, primary key is small enough and cached, so the default compression is ZSTD(3).", 0) \ + DECLARE(UInt64, marks_compress_block_size, 65536, "Mark compress block size, the actual size of the block to compress.", 0) \ + DECLARE(UInt64, primary_key_compress_block_size, 65536, "Primary compress block size, the actual size of the block to compress.", 0) \ + DECLARE(Bool, primary_key_lazy_load, true, "Load primary key in memory on first use instead of on table initialization. This can save memory in the presence of a large number of tables.", 0) \ + DECLARE(Float, primary_key_ratio_of_unique_prefix_values_to_skip_suffix_columns, 0.9f, "If the value of a column of the primary key in data part changes at least in this ratio of times, skip loading next columns in memory. This allows to save memory usage by not loading useless columns of the primary key.", 0) \ /** Projection settings. */ \ - M(UInt64, max_projections, 25, "The maximum number of merge tree projections.", 0) \ - M(LightweightMutationProjectionMode, lightweight_mutation_projection_mode, LightweightMutationProjectionMode::THROW, "When lightweight delete happens on a table with projection(s), the possible operations include throw the exception as projection exists, or drop projections of this table's relevant parts, or rebuild the projections.", 0) \ - M(DeduplicateMergeProjectionMode, deduplicate_merge_projection_mode, DeduplicateMergeProjectionMode::THROW, "Whether to allow create projection for the table with non-classic MergeTree. Ignore option is purely for compatibility which might result in incorrect answer. Otherwise, if allowed, what is the action when merge, drop or rebuild.", 0) \ + DECLARE(UInt64, max_projections, 25, "The maximum number of merge tree projections.", 0) \ + DECLARE(LightweightMutationProjectionMode, lightweight_mutation_projection_mode, LightweightMutationProjectionMode::THROW, "When lightweight delete happens on a table with projection(s), the possible operations include throw the exception as projection exists, or drop projections of this table's relevant parts, or rebuild the projections.", 0) \ + DECLARE(DeduplicateMergeProjectionMode, deduplicate_merge_projection_mode, DeduplicateMergeProjectionMode::THROW, "Whether to allow create projection for the table with non-classic MergeTree. Ignore option is purely for compatibility which might result in incorrect answer. Otherwise, if allowed, what is the action when merge, drop or rebuild.", 0) \ #define MAKE_OBSOLETE_MERGE_TREE_SETTING(M, TYPE, NAME, DEFAULT) \ M(TYPE, NAME, DEFAULT, "Obsolete setting, does nothing.", BaseSettingsHelpers::Flags::OBSOLETE) diff --git a/src/Storages/MergeTree/MutateFromLogEntryTask.cpp b/src/Storages/MergeTree/MutateFromLogEntryTask.cpp index 54215cd2dba..6716144ce81 100644 --- a/src/Storages/MergeTree/MutateFromLogEntryTask.cpp +++ b/src/Storages/MergeTree/MutateFromLogEntryTask.cpp @@ -226,6 +226,10 @@ ReplicatedMergeMutateTaskBase::PrepareResult MutateFromLogEntryTask::prepare() future_mutated_part, task_context); + storage.writePartLog( + PartLogElement::MUTATE_PART_START, {}, 0, + entry.new_part_name, new_part, future_mutated_part->parts, merge_mutate_entry.get(), {}); + mutate_task = storage.merger_mutator.mutatePartToTemporaryPart( future_mutated_part, metadata_snapshot, commands, merge_mutate_entry.get(), entry.create_time, task_context, NO_TRANSACTION_PTR, reserved_space, table_lock_holder); diff --git a/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp b/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp index 53aef36404e..fbc20b282ca 100644 --- a/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp +++ b/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp @@ -39,6 +39,10 @@ void MutatePlainMergeTreeTask::prepare() future_part, task_context); + storage.writePartLog( + PartLogElement::MUTATE_PART_START, {}, 0, + future_part->name, new_part, future_part->parts, merge_list_entry.get(), {}); + stopwatch = std::make_unique(); write_part_log = [this] (const ExecutionStatus & execution_status) diff --git a/src/Storages/MySQL/MySQLHelpers.cpp b/src/Storages/MySQL/MySQLHelpers.cpp index e9ad18ee3ac..94b63673a85 100644 --- a/src/Storages/MySQL/MySQLHelpers.cpp +++ b/src/Storages/MySQL/MySQLHelpers.cpp @@ -7,6 +7,15 @@ namespace DB { +namespace MySQLSetting +{ + extern const MySQLSettingsUInt64 connection_max_tries; + extern const MySQLSettingsUInt64 connection_pool_size; + extern const MySQLSettingsUInt64 connection_wait_timeout; + extern const MySQLSettingsUInt64 connect_timeout; + extern const MySQLSettingsUInt64 read_write_timeout; +} + namespace ErrorCodes { extern const int BAD_ARGUMENTS; @@ -26,17 +35,17 @@ mysqlxx::PoolWithFailover createMySQLPoolWithFailover( const std::string & password, const MySQLSettings & mysql_settings) { - if (!mysql_settings.connection_pool_size) + if (!mysql_settings[MySQLSetting::connection_pool_size]) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Connection pool cannot have zero size"); return mysqlxx::PoolWithFailover( database, addresses, username, password, MYSQLXX_POOL_WITH_FAILOVER_DEFAULT_START_CONNECTIONS, - static_cast(mysql_settings.connection_pool_size), - mysql_settings.connection_max_tries, - mysql_settings.connection_wait_timeout, - mysql_settings.connect_timeout, - mysql_settings.read_write_timeout); + static_cast(mysql_settings[MySQLSetting::connection_pool_size]), + mysql_settings[MySQLSetting::connection_max_tries], + mysql_settings[MySQLSetting::connection_wait_timeout], + mysql_settings[MySQLSetting::connect_timeout], + mysql_settings[MySQLSetting::read_write_timeout]); } } diff --git a/src/Storages/MySQL/MySQLSettings.cpp b/src/Storages/MySQL/MySQLSettings.cpp index ee0378a2403..392fa855dcb 100644 --- a/src/Storages/MySQL/MySQLSettings.cpp +++ b/src/Storages/MySQL/MySQLSettings.cpp @@ -1,12 +1,13 @@ -#include -#include -#include -#include -#include -#include -#include -#include +#include +#include #include +#include +#include +#include +#include +#include +#include +#include namespace DB @@ -21,11 +22,51 @@ namespace ErrorCodes extern const int UNKNOWN_SETTING; } +#define LIST_OF_MYSQL_SETTINGS(DECLARE, ALIAS) \ + DECLARE(UInt64, connection_pool_size, 16, "Size of connection pool (if all connections are in use, the query will wait until some connection will be freed).", 0) \ + DECLARE(UInt64, connection_max_tries, 3, "Number of retries for pool with failover", 0) \ + DECLARE(UInt64, connection_wait_timeout, 5, "Timeout (in seconds) for waiting for free connection (in case of there is already connection_pool_size active connections), 0 - do not wait.", 0) \ + DECLARE(Bool, connection_auto_close, true, "Auto-close connection after query execution, i.e. disable connection reuse.", 0) \ + DECLARE(UInt64, connect_timeout, DBMS_DEFAULT_CONNECT_TIMEOUT_SEC, "Connect timeout (in seconds)", 0) \ + DECLARE(UInt64, read_write_timeout, DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC, "Read/write timeout (in seconds)", 0) \ + DECLARE(MySQLDataTypesSupport, mysql_datatypes_support_level, MySQLDataTypesSupportList{}, "Which MySQL types should be converted to corresponding ClickHouse types (rather than being represented as String). Can be empty or any combination of 'decimal' or 'datetime64'. When empty MySQL's DECIMAL and DATETIME/TIMESTAMP with non-zero precision are seen as String on ClickHouse's side.", 0) \ + +DECLARE_SETTINGS_TRAITS(MySQLSettingsTraits, LIST_OF_MYSQL_SETTINGS) IMPLEMENT_SETTINGS_TRAITS(MySQLSettingsTraits, LIST_OF_MYSQL_SETTINGS) +struct MySQLSettingsImpl : public BaseSettings +{ +}; + +#define INITIALIZE_SETTING_EXTERN(TYPE, NAME, DEFAULT, DESCRIPTION, FLAGS) MySQLSettings##TYPE NAME = &MySQLSettingsImpl ::NAME; + +namespace MySQLSetting +{ +LIST_OF_MYSQL_SETTINGS(INITIALIZE_SETTING_EXTERN, SKIP_ALIAS) +} + +#undef INITIALIZE_SETTING_EXTERN + +MySQLSettings::MySQLSettings() : impl(std::make_unique()) +{ +} + +MySQLSettings::MySQLSettings(const MySQLSettings & settings) : impl(std::make_unique(*settings.impl)) +{ +} + +MySQLSettings::MySQLSettings(MySQLSettings && settings) noexcept : impl(std::make_unique(std::move(*settings.impl))) +{ +} + +MySQLSettings::~MySQLSettings() = default; + +MYSQL_SETTINGS_SUPPORTED_TYPES(MySQLSettings, IMPLEMENT_SETTING_SUBSCRIPT_OPERATOR) + + void MySQLSettings::loadFromQuery(const ASTSetQuery & settings_def) { - applyChanges(settings_def.changes); + impl->applyChanges(settings_def.changes); } void MySQLSettings::loadFromQuery(ASTStorage & storage_def) @@ -58,10 +99,10 @@ void MySQLSettings::loadFromQueryContext(ContextPtr context, ASTStorage & storag const Settings & settings = context->getQueryContext()->getSettingsRef(); - if (settings[Setting::mysql_datatypes_support_level].value != mysql_datatypes_support_level.value) + if (settings[Setting::mysql_datatypes_support_level].value != impl->mysql_datatypes_support_level.value) { static constexpr auto setting_name = "mysql_datatypes_support_level"; - set(setting_name, settings[Setting::mysql_datatypes_support_level].toString()); + impl->mysql_datatypes_support_level = settings[Setting::mysql_datatypes_support_level]; if (!storage_def.settings) { @@ -80,4 +121,21 @@ void MySQLSettings::loadFromQueryContext(ContextPtr context, ASTStorage & storag } } +std::vector MySQLSettings::getAllRegisteredNames() const +{ + std::vector all_settings; + for (const auto & setting_field : impl->all()) + all_settings.push_back(setting_field.getName()); + return all_settings; +} + +void MySQLSettings::loadFromNamedCollection(const NamedCollection & named_collection) +{ + for (const auto & setting : impl->all()) + { + const auto & setting_name = setting.getName(); + if (named_collection.has(setting_name)) + impl->set(setting_name, named_collection.get(setting_name)); + } +} } diff --git a/src/Storages/MySQL/MySQLSettings.h b/src/Storages/MySQL/MySQLSettings.h index a82bebd2506..02c79724188 100644 --- a/src/Storages/MySQL/MySQLSettings.h +++ b/src/Storages/MySQL/MySQLSettings.h @@ -1,10 +1,8 @@ #pragma once -#include -#include +#include #include -#include - +#include namespace Poco::Util { @@ -16,28 +14,40 @@ namespace DB { class ASTStorage; class ASTSetQuery; +class Context; +using ContextPtr = std::shared_ptr; +class NamedCollection; +struct MySQLSettingsImpl; -#define LIST_OF_MYSQL_SETTINGS(M, ALIAS) \ - M(UInt64, connection_pool_size, 16, "Size of connection pool (if all connections are in use, the query will wait until some connection will be freed).", 0) \ - M(UInt64, connection_max_tries, 3, "Number of retries for pool with failover", 0) \ - M(UInt64, connection_wait_timeout, 5, "Timeout (in seconds) for waiting for free connection (in case of there is already connection_pool_size active connections), 0 - do not wait.", 0) \ - M(Bool, connection_auto_close, true, "Auto-close connection after query execution, i.e. disable connection reuse.", 0) \ - M(UInt64, connect_timeout, DBMS_DEFAULT_CONNECT_TIMEOUT_SEC, "Connect timeout (in seconds)", 0) \ - M(UInt64, read_write_timeout, DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC, "Read/write timeout (in seconds)", 0) \ - M(MySQLDataTypesSupport, mysql_datatypes_support_level, MySQLDataTypesSupportList{}, "Which MySQL types should be converted to corresponding ClickHouse types (rather than being represented as String). Can be empty or any combination of 'decimal' or 'datetime64'. When empty MySQL's DECIMAL and DATETIME/TIMESTAMP with non-zero precision are seen as String on ClickHouse's side.", 0) \ +/// List of available types supported in MySQLSettings object +#define MYSQL_SETTINGS_SUPPORTED_TYPES(CLASS_NAME, M) \ + M(CLASS_NAME, Bool) \ + M(CLASS_NAME, UInt64) \ + M(CLASS_NAME, MySQLDataTypesSupport) -DECLARE_SETTINGS_TRAITS(MySQLSettingsTraits, LIST_OF_MYSQL_SETTINGS) +MYSQL_SETTINGS_SUPPORTED_TYPES(MySQLSettings, DECLARE_SETTING_TRAIT) -using MySQLBaseSettings = BaseSettings; - /** Settings for the MySQL family of engines. */ -struct MySQLSettings : public MySQLBaseSettings +struct MySQLSettings { + MySQLSettings(); + MySQLSettings(const MySQLSettings & settings); + MySQLSettings(MySQLSettings && settings) noexcept; + ~MySQLSettings(); + + MYSQL_SETTINGS_SUPPORTED_TYPES(MySQLSettings, DECLARE_SETTING_SUBSCRIPT_OPERATOR) + + std::vector getAllRegisteredNames() const; + void loadFromQuery(ASTStorage & storage_def); void loadFromQuery(const ASTSetQuery & settings_def); void loadFromQueryContext(ContextPtr context, ASTStorage & storage_def); + void loadFromNamedCollection(const NamedCollection & named_collection); + +private: + std::unique_ptr impl; }; diff --git a/src/Storages/NATS/NATSSettings.cpp b/src/Storages/NATS/NATSSettings.cpp index c3174ccb9bb..eb58a2caca7 100644 --- a/src/Storages/NATS/NATSSettings.cpp +++ b/src/Storages/NATS/NATSSettings.cpp @@ -1,9 +1,12 @@ #include +#include +#include #include #include #include #include #include +#include namespace DB { @@ -13,15 +16,75 @@ namespace ErrorCodes extern const int UNKNOWN_SETTING; } +#define NATS_RELATED_SETTINGS(DECLARE, ALIAS) \ + DECLARE(String, nats_url, "", "A host-port to connect to NATS server.", 0) \ + DECLARE(String, nats_subjects, "", "List of subject for NATS table to subscribe/publish to.", 0) \ + DECLARE(String, nats_format, "", "The message format.", 0) \ + DECLARE(String, nats_schema, "", "Schema identifier (used by schema-based formats) for NATS engine", 0) \ + DECLARE(UInt64, nats_num_consumers, 1, "The number of consumer channels per table.", 0) \ + DECLARE(String, nats_queue_group, "", "Name for queue group of NATS subscribers.", 0) \ + DECLARE(Bool, nats_secure, false, "Use SSL connection", 0) \ + DECLARE(UInt64, nats_max_reconnect, 5, "Maximum amount of reconnection attempts.", 0) \ + DECLARE(UInt64, nats_reconnect_wait, 2000, "Amount of time in milliseconds to sleep between each reconnect attempt.", 0) \ + DECLARE(String, nats_server_list, "", "Server list for connection", 0) \ + DECLARE(UInt64, nats_skip_broken_messages, 0, "Skip at least this number of broken messages from NATS per block", 0) \ + DECLARE(UInt64, nats_max_block_size, 0, "Number of row collected before flushing data from NATS.", 0) \ + DECLARE(Milliseconds, nats_flush_interval_ms, 0, "Timeout for flushing data from NATS.", 0) \ + DECLARE(String, nats_username, "", "NATS username", 0) \ + DECLARE(String, nats_password, "", "NATS password", 0) \ + DECLARE(String, nats_token, "", "NATS token", 0) \ + DECLARE(String, nats_credential_file, "", "Path to a NATS credentials file", 0) \ + DECLARE(UInt64, nats_startup_connect_tries, 5, "Number of connect tries at startup", 0) \ + DECLARE(UInt64, nats_max_rows_per_message, 1, "The maximum number of rows produced in one message for row-based formats.", 0) \ + DECLARE(StreamingHandleErrorMode, nats_handle_error_mode, StreamingHandleErrorMode::DEFAULT, "How to handle errors for NATS engine. Possible values: default (throw an exception after nats_skip_broken_messages broken messages), stream (save broken messages and errors in virtual columns _raw_message, _error).", 0) \ + +#define OBSOLETE_NATS_SETTINGS(M, ALIAS) \ + MAKE_OBSOLETE(M, Char, nats_row_delimiter, '\0') \ + +#define LIST_OF_NATS_SETTINGS(M, ALIAS) \ + NATS_RELATED_SETTINGS(M, ALIAS) \ + OBSOLETE_NATS_SETTINGS(M, ALIAS) \ + LIST_OF_ALL_FORMAT_SETTINGS(M, ALIAS) \ + +DECLARE_SETTINGS_TRAITS(NATSSettingsTraits, LIST_OF_NATS_SETTINGS) IMPLEMENT_SETTINGS_TRAITS(NATSSettingsTraits, LIST_OF_NATS_SETTINGS) +struct NATSSettingsImpl : public BaseSettings +{ +}; + +#define INITIALIZE_SETTING_EXTERN(TYPE, NAME, DEFAULT, DESCRIPTION, FLAGS) NATSSettings##TYPE NAME = &NATSSettingsImpl ::NAME; + +namespace NATSSetting +{ +LIST_OF_NATS_SETTINGS(INITIALIZE_SETTING_EXTERN, SKIP_ALIAS) +} + +#undef INITIALIZE_SETTING_EXTERN + +NATSSettings::NATSSettings() : impl(std::make_unique()) +{ +} + +NATSSettings::NATSSettings(const NATSSettings & settings) : impl(std::make_unique(*settings.impl)) +{ +} + +NATSSettings::NATSSettings(NATSSettings && settings) noexcept : impl(std::make_unique(std::move(*settings.impl))) +{ +} + +NATSSettings::~NATSSettings() = default; + +NATS_SETTINGS_SUPPORTED_TYPES(NATSSettings, IMPLEMENT_SETTING_SUBSCRIPT_OPERATOR) + void NATSSettings::loadFromQuery(ASTStorage & storage_def) { if (storage_def.settings) { try { - applyChanges(storage_def.settings->changes); + impl->applyChanges(storage_def.settings->changes); } catch (Exception & e) { @@ -37,4 +100,30 @@ void NATSSettings::loadFromQuery(ASTStorage & storage_def) storage_def.set(storage_def.settings, settings_ast); } } + +void NATSSettings::loadFromNamedCollection(const MutableNamedCollectionPtr & named_collection) +{ + for (const auto & setting : impl->all()) + { + const auto & setting_name = setting.getName(); + if (named_collection->has(setting_name)) + impl->set(setting_name, named_collection->get(setting_name)); + } +} + +SettingsChanges NATSSettings::getFormatSettings() const +{ + SettingsChanges values; + + for (const auto & setting : *impl) + { + const auto & setting_name = setting.getName(); + + /// check for non-nats-related settings + if (!setting_name.starts_with("nats_")) + values.emplace_back(setting_name, setting.getValue()); + } + + return values; +} } diff --git a/src/Storages/NATS/NATSSettings.h b/src/Storages/NATS/NATSSettings.h index bb756d38559..92d99cf6147 100644 --- a/src/Storages/NATS/NATSSettings.h +++ b/src/Storages/NATS/NATSSettings.h @@ -1,48 +1,61 @@ #pragma once -#include -#include +#include #include -#include +#include +#include +#include namespace DB { class ASTStorage; +struct NATSSettingsImpl; -#define NATS_RELATED_SETTINGS(M, ALIAS) \ - M(String, nats_url, "", "A host-port to connect to NATS server.", 0) \ - M(String, nats_subjects, "", "List of subject for NATS table to subscribe/publish to.", 0) \ - M(String, nats_format, "", "The message format.", 0) \ - M(String, nats_schema, "", "Schema identifier (used by schema-based formats) for NATS engine", 0) \ - M(UInt64, nats_num_consumers, 1, "The number of consumer channels per table.", 0) \ - M(String, nats_queue_group, "", "Name for queue group of NATS subscribers.", 0) \ - M(Bool, nats_secure, false, "Use SSL connection", 0) \ - M(UInt64, nats_max_reconnect, 5, "Maximum amount of reconnection attempts.", 0) \ - M(UInt64, nats_reconnect_wait, 2000, "Amount of time in milliseconds to sleep between each reconnect attempt.", 0) \ - M(String, nats_server_list, "", "Server list for connection", 0) \ - M(UInt64, nats_skip_broken_messages, 0, "Skip at least this number of broken messages from NATS per block", 0) \ - M(UInt64, nats_max_block_size, 0, "Number of row collected before flushing data from NATS.", 0) \ - M(Milliseconds, nats_flush_interval_ms, 0, "Timeout for flushing data from NATS.", 0) \ - M(String, nats_username, "", "NATS username", 0) \ - M(String, nats_password, "", "NATS password", 0) \ - M(String, nats_token, "", "NATS token", 0) \ - M(String, nats_credential_file, "", "Path to a NATS credentials file", 0) \ - M(UInt64, nats_startup_connect_tries, 5, "Number of connect tries at startup", 0) \ - M(UInt64, nats_max_rows_per_message, 1, "The maximum number of rows produced in one message for row-based formats.", 0) \ - M(StreamingHandleErrorMode, nats_handle_error_mode, StreamingHandleErrorMode::DEFAULT, "How to handle errors for NATS engine. Possible values: default (throw an exception after nats_skip_broken_messages broken messages), stream (save broken messages and errors in virtual columns _raw_message, _error).", 0) \ +/// List of available types supported in NATSSettings object +#define NATS_SETTINGS_SUPPORTED_TYPES(CLASS_NAME, M) \ + M(CLASS_NAME, ArrowCompression) \ + M(CLASS_NAME, Bool) \ + M(CLASS_NAME, CapnProtoEnumComparingMode) \ + M(CLASS_NAME, Char) \ + M(CLASS_NAME, DateTimeInputFormat) \ + M(CLASS_NAME, DateTimeOutputFormat) \ + M(CLASS_NAME, DateTimeOverflowBehavior) \ + M(CLASS_NAME, Double) \ + M(CLASS_NAME, EscapingRule) \ + M(CLASS_NAME, Float) \ + M(CLASS_NAME, IdentifierQuotingRule) \ + M(CLASS_NAME, IdentifierQuotingStyle) \ + M(CLASS_NAME, Int64) \ + M(CLASS_NAME, IntervalOutputFormat) \ + M(CLASS_NAME, MsgPackUUIDRepresentation) \ + M(CLASS_NAME, Milliseconds) \ + M(CLASS_NAME, ORCCompression) \ + M(CLASS_NAME, ParquetCompression) \ + M(CLASS_NAME, ParquetVersion) \ + M(CLASS_NAME, SchemaInferenceMode) \ + M(CLASS_NAME, StreamingHandleErrorMode) \ + M(CLASS_NAME, String) \ + M(CLASS_NAME, UInt64) \ + M(CLASS_NAME, UInt64Auto) \ + M(CLASS_NAME, URI) -#define OBSOLETE_NATS_SETTINGS(M, ALIAS) \ - MAKE_OBSOLETE(M, Char, nats_row_delimiter, '\0') \ +NATS_SETTINGS_SUPPORTED_TYPES(NATSSettings, DECLARE_SETTING_TRAIT) -#define LIST_OF_NATS_SETTINGS(M, ALIAS) \ - NATS_RELATED_SETTINGS(M, ALIAS) \ - OBSOLETE_NATS_SETTINGS(M, ALIAS) \ - LIST_OF_ALL_FORMAT_SETTINGS(M, ALIAS) \ - -DECLARE_SETTINGS_TRAITS(NATSSettingsTraits, LIST_OF_NATS_SETTINGS) - -struct NATSSettings : public BaseSettings +struct NATSSettings { + NATSSettings(); + NATSSettings(const NATSSettings & settings); + NATSSettings(NATSSettings && settings) noexcept; + ~NATSSettings(); + + NATS_SETTINGS_SUPPORTED_TYPES(NATSSettings, DECLARE_SETTING_SUBSCRIPT_OPERATOR) + void loadFromQuery(ASTStorage & storage_def); + void loadFromNamedCollection(const MutableNamedCollectionPtr & named_collection); + + SettingsChanges getFormatSettings() const; + +private: + std::unique_ptr impl; }; } diff --git a/src/Storages/NATS/NATSSource.h b/src/Storages/NATS/NATSSource.h index 91532442d36..591f20394d9 100644 --- a/src/Storages/NATS/NATSSource.h +++ b/src/Storages/NATS/NATSSource.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include diff --git a/src/Storages/NATS/StorageNATS.cpp b/src/Storages/NATS/StorageNATS.cpp index 01c6570d095..123f5adc22d 100644 --- a/src/Storages/NATS/StorageNATS.cpp +++ b/src/Storages/NATS/StorageNATS.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -8,24 +9,24 @@ #include #include #include -#include -#include #include +#include +#include +#include +#include +#include +#include #include #include -#include -#include +#include #include #include -#include -#include #include #include #include #include #include #include -#include #include @@ -40,6 +41,30 @@ namespace Setting extern const SettingsUInt64 output_format_avro_rows_in_file; } +namespace NATSSetting +{ + extern const NATSSettingsString nats_credential_file; + extern const NATSSettingsMilliseconds nats_flush_interval_ms; + extern const NATSSettingsString nats_format; + extern const NATSSettingsStreamingHandleErrorMode nats_handle_error_mode; + extern const NATSSettingsUInt64 nats_max_block_size; + extern const NATSSettingsUInt64 nats_max_reconnect; + extern const NATSSettingsUInt64 nats_max_rows_per_message; + extern const NATSSettingsUInt64 nats_num_consumers; + extern const NATSSettingsString nats_password; + extern const NATSSettingsString nats_queue_group; + extern const NATSSettingsUInt64 nats_reconnect_wait; + extern const NATSSettingsString nats_schema; + extern const NATSSettingsBool nats_secure; + extern const NATSSettingsString nats_server_list; + extern const NATSSettingsUInt64 nats_skip_broken_messages; + extern const NATSSettingsUInt64 nats_startup_connect_tries; + extern const NATSSettingsString nats_subjects; + extern const NATSSettingsString nats_token; + extern const NATSSettingsString nats_url; + extern const NATSSettingsString nats_username; +} + static const uint32_t QUEUE_SIZE = 100000; static const auto RESCHEDULE_MS = 500; static const auto MAX_THREAD_WORK_DURATION_MS = 60000; @@ -64,32 +89,32 @@ StorageNATS::StorageNATS( : IStorage(table_id_) , WithContext(context_->getGlobalContext()) , nats_settings(std::move(nats_settings_)) - , subjects(parseList(getContext()->getMacros()->expand(nats_settings->nats_subjects), ',')) - , format_name(getContext()->getMacros()->expand(nats_settings->nats_format)) - , schema_name(getContext()->getMacros()->expand(nats_settings->nats_schema)) - , num_consumers(nats_settings->nats_num_consumers.value) - , max_rows_per_message(nats_settings->nats_max_rows_per_message) + , subjects(parseList(getContext()->getMacros()->expand((*nats_settings)[NATSSetting::nats_subjects]), ',')) + , format_name(getContext()->getMacros()->expand((*nats_settings)[NATSSetting::nats_format])) + , schema_name(getContext()->getMacros()->expand((*nats_settings)[NATSSetting::nats_schema])) + , num_consumers((*nats_settings)[NATSSetting::nats_num_consumers].value) + , max_rows_per_message((*nats_settings)[NATSSetting::nats_max_rows_per_message]) , log(getLogger("StorageNATS (" + table_id_.table_name + ")")) , semaphore(0, static_cast(num_consumers)) , queue_size(std::max(QUEUE_SIZE, static_cast(getMaxBlockSize()))) , throw_on_startup_failure(mode <= LoadingStrictnessLevel::CREATE) { - auto nats_username = getContext()->getMacros()->expand(nats_settings->nats_username); - auto nats_password = getContext()->getMacros()->expand(nats_settings->nats_password); - auto nats_token = getContext()->getMacros()->expand(nats_settings->nats_token); - auto nats_credential_file = getContext()->getMacros()->expand(nats_settings->nats_credential_file); + auto nats_username = getContext()->getMacros()->expand((*nats_settings)[NATSSetting::nats_username]); + auto nats_password = getContext()->getMacros()->expand((*nats_settings)[NATSSetting::nats_password]); + auto nats_token = getContext()->getMacros()->expand((*nats_settings)[NATSSetting::nats_token]); + auto nats_credential_file = getContext()->getMacros()->expand((*nats_settings)[NATSSetting::nats_credential_file]); configuration = { - .url = getContext()->getMacros()->expand(nats_settings->nats_url), - .servers = parseList(getContext()->getMacros()->expand(nats_settings->nats_server_list), ','), + .url = getContext()->getMacros()->expand((*nats_settings)[NATSSetting::nats_url]), + .servers = parseList(getContext()->getMacros()->expand((*nats_settings)[NATSSetting::nats_server_list]), ','), .username = nats_username.empty() ? getContext()->getConfigRef().getString("nats.user", "") : nats_username, .password = nats_password.empty() ? getContext()->getConfigRef().getString("nats.password", "") : nats_password, .token = nats_token.empty() ? getContext()->getConfigRef().getString("nats.token", "") : nats_token, .credential_file = nats_credential_file.empty() ? getContext()->getConfigRef().getString("nats.credential_file", "") : nats_credential_file, - .max_reconnect = static_cast(nats_settings->nats_max_reconnect.value), - .reconnect_wait = static_cast(nats_settings->nats_reconnect_wait.value), - .secure = nats_settings->nats_secure.value + .max_reconnect = static_cast((*nats_settings)[NATSSetting::nats_max_reconnect].value), + .reconnect_wait = static_cast((*nats_settings)[NATSSetting::nats_reconnect_wait].value), + .secure = (*nats_settings)[NATSSetting::nats_secure].value }; if (configuration.secure) @@ -99,14 +124,14 @@ StorageNATS::StorageNATS( storage_metadata.setColumns(columns_); storage_metadata.setComment(comment); setInMemoryMetadata(storage_metadata); - setVirtuals(createVirtuals(nats_settings->nats_handle_error_mode)); + setVirtuals(createVirtuals((*nats_settings)[NATSSetting::nats_handle_error_mode])); nats_context = addSettings(getContext()); nats_context->makeQueryContext(); try { - size_t num_tries = nats_settings->nats_startup_connect_tries; + size_t num_tries = (*nats_settings)[NATSSetting::nats_startup_connect_tries]; for (size_t i = 0; i < num_tries; ++i) { connection = std::make_shared(configuration, log); @@ -143,6 +168,8 @@ StorageNATS::StorageNATS( connection_task->deactivate(); } +StorageNATS::~StorageNATS() = default; + VirtualColumnsDescription StorageNATS::createVirtuals(StreamingHandleErrorMode handle_error_mode) { VirtualColumnsDescription desc; @@ -183,8 +210,8 @@ ContextMutablePtr StorageNATS::addSettings(ContextPtr local_context) const auto modified_context = Context::createCopy(local_context); modified_context->setSetting("input_format_skip_unknown_fields", true); modified_context->setSetting("input_format_allow_errors_ratio", 0.); - if (nats_settings->nats_handle_error_mode == StreamingHandleErrorMode::DEFAULT) - modified_context->setSetting("input_format_allow_errors_num", nats_settings->nats_skip_broken_messages.value); + if ((*nats_settings)[NATSSetting::nats_handle_error_mode] == StreamingHandleErrorMode::DEFAULT) + modified_context->setSetting("input_format_allow_errors_num", (*nats_settings)[NATSSetting::nats_skip_broken_messages].value); else modified_context->setSetting("input_format_allow_errors_num", Field{0}); @@ -194,14 +221,8 @@ ContextMutablePtr StorageNATS::addSettings(ContextPtr local_context) const if (!schema_name.empty()) modified_context->setSetting("format_schema", schema_name); - for (const auto & setting : *nats_settings) - { - const auto & setting_name = setting.getName(); - - /// check for non-nats-related settings - if (!setting_name.starts_with("nats_")) - modified_context->setSetting(setting_name, setting.getValue()); - } + /// check for non-nats-related settings + modified_context->applySettingsChanges(nats_settings->getFormatSettings()); return modified_context; } @@ -306,7 +327,7 @@ void StorageNATS::deactivateTask(BackgroundSchedulePool::TaskHolder & task, bool size_t StorageNATS::getMaxBlockSize() const { - return nats_settings->nats_max_block_size.changed ? nats_settings->nats_max_block_size.value + return (*nats_settings)[NATSSetting::nats_max_block_size].changed ? (*nats_settings)[NATSSetting::nats_max_block_size].value : (getContext()->getSettingsRef()[Setting::max_insert_block_size].value / num_consumers); } @@ -350,7 +371,7 @@ void StorageNATS::read( for (size_t i = 0; i < num_created_consumers; ++i) { - auto nats_source = std::make_shared(*this, storage_snapshot, modified_context, column_names, 1, nats_settings->nats_handle_error_mode); + auto nats_source = std::make_shared(*this, storage_snapshot, modified_context, column_names, 1, (*nats_settings)[NATSSetting::nats_handle_error_mode]); auto converting_dag = ActionsDAG::makeConvertingActions( nats_source->getPort().getHeader().getColumnsWithTypeAndName(), @@ -512,7 +533,7 @@ NATSConsumerPtr StorageNATS::createConsumer() { return std::make_shared( connection, *this, subjects, - nats_settings->nats_queue_group.changed ? nats_settings->nats_queue_group.value : getStorageID().getFullTableName(), + (*nats_settings)[NATSSetting::nats_queue_group].changed ? (*nats_settings)[NATSSetting::nats_queue_group].value : getStorageID().getFullTableName(), log, queue_size, shutdown_called); } @@ -676,12 +697,12 @@ bool StorageNATS::streamToViews() for (size_t i = 0; i < num_created_consumers; ++i) { LOG_DEBUG(log, "Current queue size: {}", consumers[0]->queueSize()); - auto source = std::make_shared(*this, storage_snapshot, nats_context, column_names, block_size, nats_settings->nats_handle_error_mode); + auto source = std::make_shared(*this, storage_snapshot, nats_context, column_names, block_size, (*nats_settings)[NATSSetting::nats_handle_error_mode]); sources.emplace_back(source); pipes.emplace_back(source); - Poco::Timespan max_execution_time = nats_settings->nats_flush_interval_ms.changed - ? nats_settings->nats_flush_interval_ms + Poco::Timespan max_execution_time = (*nats_settings)[NATSSetting::nats_flush_interval_ms].changed + ? (*nats_settings)[NATSSetting::nats_flush_interval_ms] : getContext()->getSettingsRef()[Setting::stream_flush_interval_ms]; source->setTimeLimit(max_execution_time); @@ -746,25 +767,20 @@ void registerStorageNATS(StorageFactory & factory) auto nats_settings = std::make_unique(); if (auto named_collection = tryGetNamedCollectionWithOverrides(args.engine_args, args.getLocalContext())) { - for (const auto & setting : nats_settings->all()) - { - const auto & setting_name = setting.getName(); - if (named_collection->has(setting_name)) - nats_settings->set(setting_name, named_collection->get(setting_name)); - } + nats_settings->loadFromNamedCollection(named_collection); } else if (!args.storage_def->settings) throw Exception(ErrorCodes::BAD_ARGUMENTS, "NATS engine must have settings"); nats_settings->loadFromQuery(*args.storage_def); - if (!nats_settings->nats_url.changed && !nats_settings->nats_server_list.changed) + if (!(*nats_settings)[NATSSetting::nats_url].changed && !(*nats_settings)[NATSSetting::nats_server_list].changed) throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "You must specify either `nats_url` or `nats_server_list` settings"); - if (!nats_settings->nats_format.changed) + if (!(*nats_settings)[NATSSetting::nats_format].changed) throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "You must specify `nats_format` setting"); - if (!nats_settings->nats_subjects.changed) + if (!(*nats_settings)[NATSSetting::nats_subjects].changed) throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "You must specify `nats_subjects` setting"); return std::make_shared(args.table_id, args.getContext(), args.columns, args.comment, std::move(nats_settings), args.mode); diff --git a/src/Storages/NATS/StorageNATS.h b/src/Storages/NATS/StorageNATS.h index 5fca8cb0163..fe5eb6ce71d 100644 --- a/src/Storages/NATS/StorageNATS.h +++ b/src/Storages/NATS/StorageNATS.h @@ -4,9 +4,9 @@ #include #include #include +#include #include #include -#include #include #include @@ -15,6 +15,7 @@ namespace DB class NATSConsumer; using NATSConsumerPtr = std::shared_ptr; +struct NATSSettings; class StorageNATS final : public IStorage, WithContext { @@ -27,6 +28,8 @@ public: std::unique_ptr nats_settings_, LoadingStrictnessLevel mode); + ~StorageNATS() override; + std::string getName() const override { return "NATS"; } bool noPushingToViews() const override { return true; } diff --git a/src/Storages/ObjectStorage/Azure/Configuration.h b/src/Storages/ObjectStorage/Azure/Configuration.h index c3adc86b124..21db81802c7 100644 --- a/src/Storages/ObjectStorage/Azure/Configuration.h +++ b/src/Storages/ObjectStorage/Azure/Configuration.h @@ -20,6 +20,7 @@ class StorageAzureConfiguration : public StorageObjectStorage::Configuration public: using ConfigurationPtr = StorageObjectStorage::ConfigurationPtr; + static constexpr auto type = ObjectStorageType::Azure; static constexpr auto type_name = "azure"; static constexpr auto engine_name = "Azure"; /// All possible signatures for Azure engine with structure argument (for example for azureBlobStorage table function). @@ -49,6 +50,7 @@ public: StorageAzureConfiguration() = default; StorageAzureConfiguration(const StorageAzureConfiguration & other); + ObjectStorageType getType() const override { return type; } std::string getTypeName() const override { return type_name; } std::string getEngineName() const override { return engine_name; } diff --git a/src/Storages/ObjectStorage/HDFS/Configuration.h b/src/Storages/ObjectStorage/HDFS/Configuration.h index 206147d7e5e..90997292693 100644 --- a/src/Storages/ObjectStorage/HDFS/Configuration.h +++ b/src/Storages/ObjectStorage/HDFS/Configuration.h @@ -14,6 +14,7 @@ class StorageHDFSConfiguration : public StorageObjectStorage::Configuration public: using ConfigurationPtr = StorageObjectStorage::ConfigurationPtr; + static constexpr auto type = ObjectStorageType::HDFS; static constexpr auto type_name = "hdfs"; static constexpr auto engine_name = "HDFS"; /// All possible signatures for HDFS engine with structure argument (for example for hdfs table function). @@ -34,6 +35,7 @@ public: StorageHDFSConfiguration() = default; StorageHDFSConfiguration(const StorageHDFSConfiguration & other); + ObjectStorageType getType() const override { return type; } std::string getTypeName() const override { return type_name; } std::string getEngineName() const override { return engine_name; } diff --git a/src/Storages/ObjectStorage/Local/Configuration.h b/src/Storages/ObjectStorage/Local/Configuration.h index 84dc3855df3..32a095bf7de 100644 --- a/src/Storages/ObjectStorage/Local/Configuration.h +++ b/src/Storages/ObjectStorage/Local/Configuration.h @@ -18,6 +18,7 @@ class StorageLocalConfiguration : public StorageObjectStorage::Configuration public: using ConfigurationPtr = StorageObjectStorage::ConfigurationPtr; + static constexpr auto type = ObjectStorageType::Local; static constexpr auto type_name = "local"; /// All possible signatures for Local engine with structure argument (for example for local table function). static constexpr auto max_number_of_arguments_with_structure = 4; @@ -37,6 +38,7 @@ public: StorageLocalConfiguration() = default; StorageLocalConfiguration(const StorageLocalConfiguration & other) = default; + ObjectStorageType getType() const override { return type; } std::string getTypeName() const override { return type_name; } std::string getEngineName() const override { return "Local"; } diff --git a/src/Storages/ObjectStorage/S3/Configuration.h b/src/Storages/ObjectStorage/S3/Configuration.h index b36df67fb0f..f08765367fa 100644 --- a/src/Storages/ObjectStorage/S3/Configuration.h +++ b/src/Storages/ObjectStorage/S3/Configuration.h @@ -14,6 +14,7 @@ class StorageS3Configuration : public StorageObjectStorage::Configuration public: using ConfigurationPtr = StorageObjectStorage::ConfigurationPtr; + static constexpr auto type = ObjectStorageType::S3; static constexpr auto type_name = "s3"; static constexpr auto namespace_name = "bucket"; /// All possible signatures for S3 storage with structure argument (for example for s3 table function). @@ -57,6 +58,7 @@ public: StorageS3Configuration() = default; StorageS3Configuration(const StorageS3Configuration & other); + ObjectStorageType getType() const override { return type; } std::string getTypeName() const override { return type_name; } std::string getEngineName() const override { return url.storage_name; } std::string getNamespaceType() const override { return namespace_name; } diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.h b/src/Storages/ObjectStorage/StorageObjectStorage.h index f39586c23b4..3f90586c4f3 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorage.h +++ b/src/Storages/ObjectStorage/StorageObjectStorage.h @@ -163,6 +163,7 @@ public: bool with_table_structure); /// Storage type: s3, hdfs, azure, local. + virtual ObjectStorageType getType() const = 0; virtual std::string getTypeName() const = 0; /// Engine name: S3, HDFS, Azure. virtual std::string getEngineName() const = 0; diff --git a/src/Storages/ObjectStorageQueue/ObjectStorageQueueSettings.cpp b/src/Storages/ObjectStorageQueue/ObjectStorageQueueSettings.cpp index cb5f909b004..d47e7b97404 100644 --- a/src/Storages/ObjectStorageQueue/ObjectStorageQueueSettings.cpp +++ b/src/Storages/ObjectStorageQueue/ObjectStorageQueueSettings.cpp @@ -4,9 +4,10 @@ #include #include #include +#include +#include #include - namespace DB { @@ -15,29 +16,29 @@ namespace ErrorCodes extern const int UNKNOWN_SETTING; } -#define OBJECT_STORAGE_QUEUE_RELATED_SETTINGS(M, ALIAS) \ - M(ObjectStorageQueueMode, mode, ObjectStorageQueueMode::ORDERED, \ - "With unordered mode, the set of all already processed files is tracked with persistent nodes in ZooKepeer." \ +#define OBJECT_STORAGE_QUEUE_RELATED_SETTINGS(DECLARE, ALIAS) \ + DECLARE(ObjectStorageQueueMode, mode, ObjectStorageQueueMode::ORDERED, \ + "With unordered mode, the set of all already processed files is tracked with persistent nodes in ZooKeepeer." \ "With ordered mode, only the max name of the successfully consumed file stored.", \ 0) \ - M(ObjectStorageQueueAction, after_processing, ObjectStorageQueueAction::KEEP, "Delete or keep file in after successful processing", 0) \ - M(String, keeper_path, "", "Zookeeper node path", 0) \ - M(UInt32, loading_retries, 10, "Retry loading up to specified number of times", 0) \ - M(UInt32, processing_threads_num, 1, "Number of processing threads", 0) \ - M(UInt32, enable_logging_to_queue_log, 1, "Enable logging to system table system.(s3/azure_)queue_log", 0) \ - M(String, last_processed_path, "", "For Ordered mode. Files that have lexicographically smaller file name are considered already processed", 0) \ - M(UInt32, tracked_file_ttl_sec, 0, "Maximum number of seconds to store processed files in ZooKeeper node (store forever by default)", 0) \ - M(UInt32, polling_min_timeout_ms, 1000, "Minimal timeout before next polling", 0) \ - M(UInt32, polling_max_timeout_ms, 10000, "Maximum timeout before next polling", 0) \ - M(UInt32, polling_backoff_ms, 1000, "Polling backoff", 0) \ - M(UInt32, tracked_files_limit, 1000, "For unordered mode. Max set size for tracking processed files in ZooKeeper", 0) \ - M(UInt32, cleanup_interval_min_ms, 60000, "For unordered mode. Polling backoff min for cleanup", 0) \ - M(UInt32, cleanup_interval_max_ms, 60000, "For unordered mode. Polling backoff max for cleanup", 0) \ - M(UInt32, buckets, 0, "Number of buckets for Ordered mode parallel processing", 0) \ - M(UInt32, max_processed_files_before_commit, 100, "Number of files which can be processed before being committed to keeper", 0) \ - M(UInt32, max_processed_rows_before_commit, 0, "Number of rows which can be processed before being committed to keeper", 0) \ - M(UInt32, max_processed_bytes_before_commit, 0, "Number of bytes which can be processed before being committed to keeper", 0) \ - M(UInt32, max_processing_time_sec_before_commit, 0, "Timeout in seconds after which to commit files committed to keeper", 0) \ + DECLARE(ObjectStorageQueueAction, after_processing, ObjectStorageQueueAction::KEEP, "Delete or keep file in after successful processing", 0) \ + DECLARE(String, keeper_path, "", "Zookeeper node path", 0) \ + DECLARE(UInt32, loading_retries, 10, "Retry loading up to specified number of times", 0) \ + DECLARE(UInt32, processing_threads_num, 1, "Number of processing threads", 0) \ + DECLARE(UInt32, enable_logging_to_queue_log, 1, "Enable logging to system table system.(s3/azure_)queue_log", 0) \ + DECLARE(String, last_processed_path, "", "For Ordered mode. Files that have lexicographically smaller file name are considered already processed", 0) \ + DECLARE(UInt32, tracked_file_ttl_sec, 0, "Maximum number of seconds to store processed files in ZooKeeper node (store forever by default)", 0) \ + DECLARE(UInt32, polling_min_timeout_ms, 1000, "Minimal timeout before next polling", 0) \ + DECLARE(UInt32, polling_max_timeout_ms, 10000, "Maximum timeout before next polling", 0) \ + DECLARE(UInt32, polling_backoff_ms, 1000, "Polling backoff", 0) \ + DECLARE(UInt32, tracked_files_limit, 1000, "For unordered mode. Max set size for tracking processed files in ZooKeeper", 0) \ + DECLARE(UInt32, cleanup_interval_min_ms, 60000, "For unordered mode. Polling backoff min for cleanup", 0) \ + DECLARE(UInt32, cleanup_interval_max_ms, 60000, "For unordered mode. Polling backoff max for cleanup", 0) \ + DECLARE(UInt32, buckets, 0, "Number of buckets for Ordered mode parallel processing", 0) \ + DECLARE(UInt64, max_processed_files_before_commit, 100, "Number of files which can be processed before being committed to keeper", 0) \ + DECLARE(UInt64, max_processed_rows_before_commit, 0, "Number of rows which can be processed before being committed to keeper", 0) \ + DECLARE(UInt64, max_processed_bytes_before_commit, 0, "Number of bytes which can be processed before being committed to keeper", 0) \ + DECLARE(UInt64, max_processing_time_sec_before_commit, 0, "Timeout in seconds after which to commit files committed to keeper", 0) \ #define LIST_OF_OBJECT_STORAGE_QUEUE_SETTINGS(M, ALIAS) \ OBJECT_STORAGE_QUEUE_RELATED_SETTINGS(M, ALIAS) \ @@ -74,6 +75,38 @@ ObjectStorageQueueSettings::ObjectStorageQueueSettings(ObjectStorageQueueSetting { } +void ObjectStorageQueueSettings::dumpToSystemEngineSettingsColumns( + MutableColumnsAndConstraints & params, + const std::string & table_name, + const std::string & database_name, + const StorageObjectStorageQueue & storage) const +{ + MutableColumns & res_columns = params.res_columns; + + /// We cannot use setting.isValueChanged(), because we do not store initial settings in storage. + /// Therefore check if the setting was changed via table metadata. + const auto & settings_changes = storage.getInMemoryMetadataPtr()->settings_changes->as()->changes; + auto is_changed = [&](const std::string & setting_name) -> bool + { + return settings_changes.end() != std::find_if( + settings_changes.begin(), settings_changes.end(), + [&](const SettingChange & change){ return change.name == setting_name; }); + }; + + for (const auto & change : impl->all()) + { + size_t i = 0; + res_columns[i++]->insert(database_name); + res_columns[i++]->insert(table_name); + res_columns[i++]->insert(change.getName()); + res_columns[i++]->insert(convertFieldToString(change.getValue())); + res_columns[i++]->insert(change.getTypeName()); + res_columns[i++]->insert(is_changed(change.getName())); + res_columns[i++]->insert(change.getDescription()); + res_columns[i++]->insert(false); + } +} + ObjectStorageQueueSettings::~ObjectStorageQueueSettings() = default; OBJECT_STORAGE_QUEUE_SETTINGS_SUPPORTED_TYPES(ObjectStorageQueueSettings, IMPLEMENT_SETTING_SUBSCRIPT_OPERATOR) diff --git a/src/Storages/ObjectStorageQueue/ObjectStorageQueueSettings.h b/src/Storages/ObjectStorageQueue/ObjectStorageQueueSettings.h index 37c65dee0ca..c2929ac27fb 100644 --- a/src/Storages/ObjectStorageQueue/ObjectStorageQueueSettings.h +++ b/src/Storages/ObjectStorageQueue/ObjectStorageQueueSettings.h @@ -1,7 +1,7 @@ #pragma once #include -#include +#include #include #include @@ -10,6 +10,8 @@ namespace DB { class ASTStorage; struct ObjectStorageQueueSettingsImpl; +struct MutableColumnsAndConstraints; +class StorageObjectStorageQueue; /// List of available types supported in ObjectStorageQueueSettings object #define OBJECT_STORAGE_QUEUE_SETTINGS_SUPPORTED_TYPES(CLASS_NAME, M) \ @@ -51,6 +53,12 @@ struct ObjectStorageQueueSettings OBJECT_STORAGE_QUEUE_SETTINGS_SUPPORTED_TYPES(ObjectStorageQueueSettings, DECLARE_SETTING_SUBSCRIPT_OPERATOR) + void dumpToSystemEngineSettingsColumns( + MutableColumnsAndConstraints & params, + const std::string & table_name, + const std::string & database_name, + const StorageObjectStorageQueue & storage) const; + void loadFromQuery(ASTStorage & storage_def); private: diff --git a/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.cpp b/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.cpp index 26966f9cbd2..c55287d2177 100644 --- a/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.cpp +++ b/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.cpp @@ -26,10 +26,10 @@ namespace Setting namespace ObjectStorageQueueSetting { extern const ObjectStorageQueueSettingsObjectStorageQueueAction after_processing; - extern const ObjectStorageQueueSettingsUInt32 max_processed_bytes_before_commit; - extern const ObjectStorageQueueSettingsUInt32 max_processed_files_before_commit; - extern const ObjectStorageQueueSettingsUInt32 max_processed_rows_before_commit; - extern const ObjectStorageQueueSettingsUInt32 max_processing_time_sec_before_commit; + extern const ObjectStorageQueueSettingsUInt64 max_processed_bytes_before_commit; + extern const ObjectStorageQueueSettingsUInt64 max_processed_files_before_commit; + extern const ObjectStorageQueueSettingsUInt64 max_processed_rows_before_commit; + extern const ObjectStorageQueueSettingsUInt64 max_processing_time_sec_before_commit; } namespace ErrorCodes diff --git a/src/Storages/ObjectStorageQueue/ObjectStorageQueueTableMetadata.h b/src/Storages/ObjectStorageQueue/ObjectStorageQueueTableMetadata.h index 433c6b26fe7..a4edd8831c1 100644 --- a/src/Storages/ObjectStorageQueue/ObjectStorageQueueTableMetadata.h +++ b/src/Storages/ObjectStorageQueue/ObjectStorageQueueTableMetadata.h @@ -23,13 +23,13 @@ struct ObjectStorageQueueTableMetadata const String columns; const String after_processing; const String mode; - const UInt64 tracked_files_limit; - const UInt64 tracked_files_ttl_sec; - const UInt64 buckets; + const UInt32 tracked_files_limit; + const UInt32 tracked_files_ttl_sec; + const UInt32 buckets; const String last_processed_path; - const UInt64 loading_retries; + const UInt32 loading_retries; - UInt64 processing_threads_num; /// Can be changed from keeper. + UInt32 processing_threads_num; /// Can be changed from keeper. bool processing_threads_num_changed = false; ObjectStorageQueueTableMetadata( diff --git a/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp b/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp index 5da5d56bf3b..245b441513d 100644 --- a/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp +++ b/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp @@ -47,14 +47,20 @@ namespace ObjectStorageQueueSetting extern const ObjectStorageQueueSettingsUInt32 enable_logging_to_queue_log; extern const ObjectStorageQueueSettingsString keeper_path; extern const ObjectStorageQueueSettingsObjectStorageQueueMode mode; - extern const ObjectStorageQueueSettingsUInt32 max_processed_bytes_before_commit; - extern const ObjectStorageQueueSettingsUInt32 max_processed_files_before_commit; - extern const ObjectStorageQueueSettingsUInt32 max_processed_rows_before_commit; - extern const ObjectStorageQueueSettingsUInt32 max_processing_time_sec_before_commit; + extern const ObjectStorageQueueSettingsUInt64 max_processed_bytes_before_commit; + extern const ObjectStorageQueueSettingsUInt64 max_processed_files_before_commit; + extern const ObjectStorageQueueSettingsUInt64 max_processed_rows_before_commit; + extern const ObjectStorageQueueSettingsUInt64 max_processing_time_sec_before_commit; extern const ObjectStorageQueueSettingsUInt32 polling_min_timeout_ms; extern const ObjectStorageQueueSettingsUInt32 polling_max_timeout_ms; extern const ObjectStorageQueueSettingsUInt32 polling_backoff_ms; extern const ObjectStorageQueueSettingsUInt32 processing_threads_num; + extern const ObjectStorageQueueSettingsUInt32 buckets; + extern const ObjectStorageQueueSettingsUInt32 tracked_file_ttl_sec; + extern const ObjectStorageQueueSettingsUInt32 tracked_files_limit; + extern const ObjectStorageQueueSettingsString last_processed_path; + extern const ObjectStorageQueueSettingsUInt32 loading_retries; + extern const ObjectStorageQueueSettingsObjectStorageQueueAction after_processing; } namespace ErrorCodes @@ -145,10 +151,12 @@ StorageObjectStorageQueue::StorageObjectStorageQueue( const String & comment, ContextPtr context_, std::optional format_settings_, - ASTStorage * /* engine_args */, + ASTStorage * engine_args, LoadingStrictnessLevel mode) : IStorage(table_id_) , WithContext(context_) + , type(configuration_->getType()) + , engine_name(engine_args->engine->name) , zk_path(chooseZooKeeperPath(table_id_, context_->getSettingsRef(), *queue_settings_)) , enable_logging_to_queue_log((*queue_settings_)[ObjectStorageQueueSetting::enable_logging_to_queue_log]) , polling_min_timeout_ms((*queue_settings_)[ObjectStorageQueueSetting::polling_min_timeout_ms]) @@ -194,6 +202,7 @@ StorageObjectStorageQueue::StorageObjectStorageQueue( storage_metadata.setColumns(columns); storage_metadata.setConstraints(constraints_); storage_metadata.setComment(comment); + storage_metadata.settings_changes = engine_args->settings->ptr(); setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.columns, context_)); setInMemoryMetadata(storage_metadata); @@ -560,4 +569,31 @@ std::shared_ptr StorageObjectStorageQue return std::make_shared(files_metadata, std::move(glob_iterator), shutdown_called, log); } +ObjectStorageQueueSettings StorageObjectStorageQueue::getSettings() const +{ + /// We do not store queue settings + /// (because of the inconvenience of keeping them in sync with ObjectStorageQueueTableMetadata), + /// so let's reconstruct. + ObjectStorageQueueSettings settings; + const auto & table_metadata = getTableMetadata(); + settings[ObjectStorageQueueSetting::after_processing] = table_metadata.after_processing; + settings[ObjectStorageQueueSetting::keeper_path] = zk_path; + settings[ObjectStorageQueueSetting::loading_retries] = table_metadata.loading_retries; + settings[ObjectStorageQueueSetting::processing_threads_num] = table_metadata.processing_threads_num; + settings[ObjectStorageQueueSetting::enable_logging_to_queue_log] = enable_logging_to_queue_log; + settings[ObjectStorageQueueSetting::last_processed_path] = table_metadata.last_processed_path; + settings[ObjectStorageQueueSetting::tracked_file_ttl_sec] = 0; + settings[ObjectStorageQueueSetting::tracked_files_limit] = 0; + settings[ObjectStorageQueueSetting::polling_min_timeout_ms] = polling_min_timeout_ms; + settings[ObjectStorageQueueSetting::polling_max_timeout_ms] = polling_max_timeout_ms; + settings[ObjectStorageQueueSetting::polling_backoff_ms] = polling_backoff_ms; + settings[ObjectStorageQueueSetting::cleanup_interval_min_ms] = 0; + settings[ObjectStorageQueueSetting::cleanup_interval_max_ms] = 0; + settings[ObjectStorageQueueSetting::buckets] = table_metadata.buckets; + settings[ObjectStorageQueueSetting::max_processed_files_before_commit] = commit_settings.max_processed_files_before_commit; + settings[ObjectStorageQueueSetting::max_processed_rows_before_commit] = commit_settings.max_processed_rows_before_commit; + settings[ObjectStorageQueueSetting::max_processed_bytes_before_commit] = commit_settings.max_processed_bytes_before_commit; + return settings; +} + } diff --git a/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.h b/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.h index 68186074007..04b0a16834d 100644 --- a/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.h +++ b/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.h @@ -7,6 +7,7 @@ #include #include #include +#include #include #include @@ -33,7 +34,9 @@ public: ASTStorage * engine_args, LoadingStrictnessLevel mode); - String getName() const override { return "ObjectStorageQueue"; } + String getName() const override { return engine_name; } + + ObjectStorageType getType() { return type; } void read( QueryPlan & query_plan, @@ -51,16 +54,20 @@ public: zkutil::ZooKeeperPtr getZooKeeper() const; + ObjectStorageQueueSettings getSettings() const; + private: friend class ReadFromObjectStorageQueue; using FileIterator = ObjectStorageQueueSource::FileIterator; using CommitSettings = ObjectStorageQueueSource::CommitSettings; + ObjectStorageType type; + const std::string engine_name; const fs::path zk_path; const bool enable_logging_to_queue_log; - const size_t polling_min_timeout_ms; - const size_t polling_max_timeout_ms; - const size_t polling_backoff_ms; + const UInt32 polling_min_timeout_ms; + const UInt32 polling_max_timeout_ms; + const UInt32 polling_backoff_ms; const CommitSettings commit_settings; std::shared_ptr files_metadata; diff --git a/src/Storages/PostgreSQL/MaterializedPostgreSQLSettings.cpp b/src/Storages/PostgreSQL/MaterializedPostgreSQLSettings.cpp index 3e067a9320e..d6824c43ac9 100644 --- a/src/Storages/PostgreSQL/MaterializedPostgreSQLSettings.cpp +++ b/src/Storages/PostgreSQL/MaterializedPostgreSQLSettings.cpp @@ -18,20 +18,20 @@ namespace ErrorCodes extern const int UNKNOWN_SETTING; } -#define LIST_OF_MATERIALIZED_POSTGRESQL_SETTINGS(M, ALIAS) \ - M(UInt64, materialized_postgresql_max_block_size, 65536, "Number of row collected before flushing data into table.", 0) \ - M(String, materialized_postgresql_tables_list, "", "List of tables for MaterializedPostgreSQL database engine", 0) \ - M(String, materialized_postgresql_schema_list, "", "List of schemas for MaterializedPostgreSQL database engine", 0) \ - M(String, materialized_postgresql_replication_slot, "", "A user-created replication slot", 0) \ - M(String, materialized_postgresql_snapshot, "", "User provided snapshot in case he manages replication slots himself", 0) \ - M(String, materialized_postgresql_schema, "", "PostgreSQL schema", 0) \ - M(Bool, materialized_postgresql_tables_list_with_schema, false, \ +#define LIST_OF_MATERIALIZED_POSTGRESQL_SETTINGS(DECLARE, ALIAS) \ + DECLARE(UInt64, materialized_postgresql_max_block_size, 65536, "Number of row collected before flushing data into table.", 0) \ + DECLARE(String, materialized_postgresql_tables_list, "", "List of tables for MaterializedPostgreSQL database engine", 0) \ + DECLARE(String, materialized_postgresql_schema_list, "", "List of schemas for MaterializedPostgreSQL database engine", 0) \ + DECLARE(String, materialized_postgresql_replication_slot, "", "A user-created replication slot", 0) \ + DECLARE(String, materialized_postgresql_snapshot, "", "User provided snapshot in case he manages replication slots himself", 0) \ + DECLARE(String, materialized_postgresql_schema, "", "PostgreSQL schema", 0) \ + DECLARE(Bool, materialized_postgresql_tables_list_with_schema, false, \ "Consider by default that if there is a dot in tables list 'name.name', " \ "then the first name is postgres schema and second is postgres table. This setting is needed to allow table names with dots", 0) \ - M(UInt64, materialized_postgresql_backoff_min_ms, 200, "Poll backoff start point", 0) \ - M(UInt64, materialized_postgresql_backoff_max_ms, 10000, "Poll backoff max point", 0) \ - M(UInt64, materialized_postgresql_backoff_factor, 2, "Poll backoff factor", 0) \ - M(Bool, materialized_postgresql_use_unique_replication_consumer_identifier, false, "Should a unique consumer be registered for table replication", 0) \ + DECLARE(UInt64, materialized_postgresql_backoff_min_ms, 200, "Poll backoff start point", 0) \ + DECLARE(UInt64, materialized_postgresql_backoff_max_ms, 10000, "Poll backoff max point", 0) \ + DECLARE(UInt64, materialized_postgresql_backoff_factor, 2, "Poll backoff factor", 0) \ + DECLARE(Bool, materialized_postgresql_use_unique_replication_consumer_identifier, false, "Should a unique consumer be registered for table replication", 0) \ DECLARE_SETTINGS_TRAITS(MaterializedPostgreSQLSettingsTraits, LIST_OF_MATERIALIZED_POSTGRESQL_SETTINGS) IMPLEMENT_SETTINGS_TRAITS(MaterializedPostgreSQLSettingsTraits, LIST_OF_MATERIALIZED_POSTGRESQL_SETTINGS) diff --git a/src/Storages/RabbitMQ/RabbitMQSettings.cpp b/src/Storages/RabbitMQ/RabbitMQSettings.cpp index f53e6c1feb1..3921f19911b 100644 --- a/src/Storages/RabbitMQ/RabbitMQSettings.cpp +++ b/src/Storages/RabbitMQ/RabbitMQSettings.cpp @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include #include @@ -16,34 +16,34 @@ namespace ErrorCodes extern const int UNKNOWN_SETTING; } -#define RABBITMQ_RELATED_SETTINGS(M, ALIAS) \ - M(String, rabbitmq_host_port, "", "A host-port to connect to RabbitMQ server.", 0) \ - M(String, rabbitmq_exchange_name, "clickhouse-exchange", "The exchange name, to which messages are sent.", 0) \ - M(String, rabbitmq_format, "", "The message format.", 0) \ - M(String, rabbitmq_exchange_type, "default", "The exchange type.", 0) \ - M(String, rabbitmq_routing_key_list, "5672", "A string of routing keys, separated by dots.", 0) \ - M(String, rabbitmq_schema, "", "Schema identifier (used by schema-based formats) for RabbitMQ engine", 0) \ - M(UInt64, rabbitmq_num_consumers, 1, "The number of consumer channels per table.", 0) \ - M(UInt64, rabbitmq_num_queues, 1, "The number of queues per consumer.", 0) \ - M(String, rabbitmq_queue_base, "", "Base for queue names to be able to reopen non-empty queues in case of failure.", 0) \ - M(Bool, rabbitmq_persistent, false, "For insert query messages will be made 'persistent', durable.", 0) \ - M(Bool, rabbitmq_secure, false, "Use SSL connection", 0) \ - M(String, rabbitmq_address, "", "Address for connection", 0) \ - M(UInt64, rabbitmq_skip_broken_messages, 0, "Skip at least this number of broken messages from RabbitMQ per block", 0) \ - M(UInt64, rabbitmq_max_block_size, 0, "Number of row collected before flushing data from RabbitMQ.", 0) \ - M(UInt64, rabbitmq_flush_interval_ms, 0, "Timeout for flushing data from RabbitMQ.", 0) \ - M(String, rabbitmq_vhost, "/", "RabbitMQ vhost.", 0) \ - M(String, rabbitmq_queue_settings_list, "", "A list of rabbitmq queue settings", 0) \ - M(UInt64, rabbitmq_empty_queue_backoff_start_ms, 10, "A minimum backoff point to reschedule read if the rabbitmq queue is empty", 0) \ - M(UInt64, rabbitmq_empty_queue_backoff_end_ms, 10000, "A maximum backoff point to reschedule read if the rabbitmq queue is empty", 0) \ - M(UInt64, rabbitmq_empty_queue_backoff_step_ms, 100, "A backoff step to reschedule read if the rabbitmq queue is empty", 0) \ - M(Bool, rabbitmq_queue_consume, false, "Use user-defined queues and do not make any RabbitMQ setup: declaring exchanges, queues, bindings", 0) \ - M(String, rabbitmq_username, "", "RabbitMQ username", 0) \ - M(String, rabbitmq_password, "", "RabbitMQ password", 0) \ - M(Bool, reject_unhandled_messages, false, "Allow messages to be rejected in case they cannot be processed. This also automatically implies if there is a x-deadletter-exchange queue setting added", 0) \ - M(Bool, rabbitmq_commit_on_select, false, "Commit messages when select query is made", 0) \ - M(UInt64, rabbitmq_max_rows_per_message, 1, "The maximum number of rows produced in one message for row-based formats.", 0) \ - M(StreamingHandleErrorMode, rabbitmq_handle_error_mode, StreamingHandleErrorMode::DEFAULT, "How to handle errors for RabbitMQ engine. Possible values: default (throw an exception after rabbitmq_skip_broken_messages broken messages), stream (save broken messages and errors in virtual columns _raw_message, _error).", 0) \ +#define RABBITMQ_RELATED_SETTINGS(DECLARE, ALIAS) \ + DECLARE(String, rabbitmq_host_port, "", "A host-port to connect to RabbitMQ server.", 0) \ + DECLARE(String, rabbitmq_exchange_name, "clickhouse-exchange", "The exchange name, to which messages are sent.", 0) \ + DECLARE(String, rabbitmq_format, "", "The message format.", 0) \ + DECLARE(String, rabbitmq_exchange_type, "default", "The exchange type.", 0) \ + DECLARE(String, rabbitmq_routing_key_list, "5672", "A string of routing keys, separated by dots.", 0) \ + DECLARE(String, rabbitmq_schema, "", "Schema identifier (used by schema-based formats) for RabbitMQ engine", 0) \ + DECLARE(UInt64, rabbitmq_num_consumers, 1, "The number of consumer channels per table.", 0) \ + DECLARE(UInt64, rabbitmq_num_queues, 1, "The number of queues per consumer.", 0) \ + DECLARE(String, rabbitmq_queue_base, "", "Base for queue names to be able to reopen non-empty queues in case of failure.", 0) \ + DECLARE(Bool, rabbitmq_persistent, false, "For insert query messages will be made 'persistent', durable.", 0) \ + DECLARE(Bool, rabbitmq_secure, false, "Use SSL connection", 0) \ + DECLARE(String, rabbitmq_address, "", "Address for connection", 0) \ + DECLARE(UInt64, rabbitmq_skip_broken_messages, 0, "Skip at least this number of broken messages from RabbitMQ per block", 0) \ + DECLARE(UInt64, rabbitmq_max_block_size, 0, "Number of row collected before flushing data from RabbitMQ.", 0) \ + DECLARE(UInt64, rabbitmq_flush_interval_ms, 0, "Timeout for flushing data from RabbitMQ.", 0) \ + DECLARE(String, rabbitmq_vhost, "/", "RabbitMQ vhost.", 0) \ + DECLARE(String, rabbitmq_queue_settings_list, "", "A list of rabbitmq queue settings", 0) \ + DECLARE(UInt64, rabbitmq_empty_queue_backoff_start_ms, 10, "A minimum backoff point to reschedule read if the rabbitmq queue is empty", 0) \ + DECLARE(UInt64, rabbitmq_empty_queue_backoff_end_ms, 10000, "A maximum backoff point to reschedule read if the rabbitmq queue is empty", 0) \ + DECLARE(UInt64, rabbitmq_empty_queue_backoff_step_ms, 100, "A backoff step to reschedule read if the rabbitmq queue is empty", 0) \ + DECLARE(Bool, rabbitmq_queue_consume, false, "Use user-defined queues and do not make any RabbitMQ setup: declaring exchanges, queues, bindings", 0) \ + DECLARE(String, rabbitmq_username, "", "RabbitMQ username", 0) \ + DECLARE(String, rabbitmq_password, "", "RabbitMQ password", 0) \ + DECLARE(Bool, reject_unhandled_messages, false, "Allow messages to be rejected in case they cannot be processed. This also automatically implies if there is a x-deadletter-exchange queue setting added", 0) \ + DECLARE(Bool, rabbitmq_commit_on_select, false, "Commit messages when select query is made", 0) \ + DECLARE(UInt64, rabbitmq_max_rows_per_message, 1, "The maximum number of rows produced in one message for row-based formats.", 0) \ + DECLARE(StreamingHandleErrorMode, rabbitmq_handle_error_mode, StreamingHandleErrorMode::DEFAULT, "How to handle errors for RabbitMQ engine. Possible values: default (throw an exception after rabbitmq_skip_broken_messages broken messages), stream (save broken messages and errors in virtual columns _raw_message, _error).", 0) \ #define OBSOLETE_RABBITMQ_SETTINGS(M, ALIAS) \ MAKE_OBSOLETE(M, Char, rabbitmq_row_delimiter, '\0') \ diff --git a/src/Storages/RabbitMQ/RabbitMQSource.h b/src/Storages/RabbitMQ/RabbitMQSource.h index 54a9f52de6d..936064ee472 100644 --- a/src/Storages/RabbitMQ/RabbitMQSource.h +++ b/src/Storages/RabbitMQ/RabbitMQSource.h @@ -1,8 +1,9 @@ #pragma once +#include #include -#include #include +#include namespace DB diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.h b/src/Storages/RabbitMQ/StorageRabbitMQ.h index e80c9d2a0f5..a87ecc305f6 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.h +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.h @@ -1,7 +1,7 @@ #pragma once #include -#include +#include #include #include #include diff --git a/src/Storages/RocksDB/RocksDBSettings.cpp b/src/Storages/RocksDB/RocksDBSettings.cpp index fef15660cc8..d067e516eb1 100644 --- a/src/Storages/RocksDB/RocksDBSettings.cpp +++ b/src/Storages/RocksDB/RocksDBSettings.cpp @@ -14,9 +14,9 @@ namespace ErrorCodes /** StorageEmbeddedRocksdb table settings */ -#define LIST_OF_ROCKSDB_SETTINGS(M, ALIAS) \ - M(Bool, optimize_for_bulk_insert, true, "Table is optimized for bulk insertions (insert pipeline will create SST files and import to rocksdb database instead of writing to memtables)", 0) \ - M(UInt64, bulk_insert_block_size, DEFAULT_INSERT_BLOCK_SIZE, "Size of block for bulk insert, if it's smaller than query setting min_insert_block_size_rows then it will be overridden by min_insert_block_size_rows", 0) \ +#define LIST_OF_ROCKSDB_SETTINGS(DECLARE, ALIAS) \ + DECLARE(Bool, optimize_for_bulk_insert, true, "Table is optimized for bulk insertions (insert pipeline will create SST files and import to rocksdb database instead of writing to memtables)", 0) \ + DECLARE(UInt64, bulk_insert_block_size, DEFAULT_INSERT_BLOCK_SIZE, "Size of block for bulk insert, if it's smaller than query setting min_insert_block_size_rows then it will be overridden by min_insert_block_size_rows", 0) \ DECLARE_SETTINGS_TRAITS(RocksDBSettingsTraits, LIST_OF_ROCKSDB_SETTINGS) IMPLEMENT_SETTINGS_TRAITS(RocksDBSettingsTraits, LIST_OF_ROCKSDB_SETTINGS) diff --git a/src/Storages/SetSettings.cpp b/src/Storages/SetSettings.cpp index 4e6dd6a0519..525fbfa570f 100644 --- a/src/Storages/SetSettings.cpp +++ b/src/Storages/SetSettings.cpp @@ -1,9 +1,11 @@ -#include -#include -#include -#include -#include #include +#include +#include +#include +#include +#include +#include +#include namespace DB @@ -14,7 +16,46 @@ namespace ErrorCodes extern const int UNKNOWN_SETTING; } -IMPLEMENT_SETTINGS_TRAITS(setSettingsTraits, LIST_OF_SET_SETTINGS) +#define SET_RELATED_SETTINGS(DECLARE, ALIAS) \ + DECLARE(Bool, persistent, true, "Disable setting to avoid the overhead of writing to disk for StorageSet", 0) \ + DECLARE(String, disk, "default", "Name of the disk used to persist set data", 0) + +#define LIST_OF_SET_SETTINGS(M, ALIAS) \ + SET_RELATED_SETTINGS(M, ALIAS) \ + LIST_OF_ALL_FORMAT_SETTINGS(M, ALIAS) + +DECLARE_SETTINGS_TRAITS(SetSettingsTraits, LIST_OF_SET_SETTINGS) +IMPLEMENT_SETTINGS_TRAITS(SetSettingsTraits, LIST_OF_SET_SETTINGS) + + +struct SetSettingsImpl : public BaseSettings +{ +}; + +#define INITIALIZE_SETTING_EXTERN(TYPE, NAME, DEFAULT, DESCRIPTION, FLAGS) SetSettings##TYPE NAME = &SetSettingsImpl ::NAME; + +namespace SetSetting +{ +LIST_OF_SET_SETTINGS(INITIALIZE_SETTING_EXTERN, SKIP_ALIAS) +} + +#undef INITIALIZE_SETTING_EXTERN + +SetSettings::SetSettings() : impl(std::make_unique()) +{ +} + +SetSettings::SetSettings(const SetSettings & settings) : impl(std::make_unique(*settings.impl)) +{ +} + +SetSettings::SetSettings(SetSettings && settings) noexcept : impl(std::make_unique(std::move(*settings.impl))) +{ +} + +SetSettings::~SetSettings() = default; + +SET_SETTINGS_SUPPORTED_TYPES(SetSettings, IMPLEMENT_SETTING_SUBSCRIPT_OPERATOR) void SetSettings::loadFromQuery(ASTStorage & storage_def) { @@ -22,7 +63,7 @@ void SetSettings::loadFromQuery(ASTStorage & storage_def) { try { - applyChanges(storage_def.settings->changes); + impl->applyChanges(storage_def.settings->changes); } catch (Exception & e) { diff --git a/src/Storages/SetSettings.h b/src/Storages/SetSettings.h index bd14859ff1e..a9729021692 100644 --- a/src/Storages/SetSettings.h +++ b/src/Storages/SetSettings.h @@ -1,31 +1,59 @@ #pragma once -#include -#include +#include #include +#include namespace DB { class ASTStorage; +struct SetSettingsImpl; +/// List of available types supported in SetSettings object +#define SET_SETTINGS_SUPPORTED_TYPES(CLASS_NAME, M) \ + M(CLASS_NAME, ArrowCompression) \ + M(CLASS_NAME, Bool) \ + M(CLASS_NAME, CapnProtoEnumComparingMode) \ + M(CLASS_NAME, Char) \ + M(CLASS_NAME, DateTimeInputFormat) \ + M(CLASS_NAME, DateTimeOutputFormat) \ + M(CLASS_NAME, DateTimeOverflowBehavior) \ + M(CLASS_NAME, Double) \ + M(CLASS_NAME, EscapingRule) \ + M(CLASS_NAME, Float) \ + M(CLASS_NAME, IdentifierQuotingRule) \ + M(CLASS_NAME, IdentifierQuotingStyle) \ + M(CLASS_NAME, Int64) \ + M(CLASS_NAME, IntervalOutputFormat) \ + M(CLASS_NAME, MsgPackUUIDRepresentation) \ + M(CLASS_NAME, ORCCompression) \ + M(CLASS_NAME, ParquetCompression) \ + M(CLASS_NAME, ParquetVersion) \ + M(CLASS_NAME, SchemaInferenceMode) \ + M(CLASS_NAME, String) \ + M(CLASS_NAME, UInt64) \ + M(CLASS_NAME, UInt64Auto) \ + M(CLASS_NAME, URI) -#define SET_RELATED_SETTINGS(M, ALIAS) \ - M(Bool, persistent, true, "Disable setting to avoid the overhead of writing to disk for StorageSet", 0) \ - M(String, disk, "default", "Name of the disk used to persist set data", 0) - -#define LIST_OF_SET_SETTINGS(M, ALIAS) \ - SET_RELATED_SETTINGS(M, ALIAS) \ - LIST_OF_ALL_FORMAT_SETTINGS(M, ALIAS) - -DECLARE_SETTINGS_TRAITS(setSettingsTraits, LIST_OF_SET_SETTINGS) +SET_SETTINGS_SUPPORTED_TYPES(SetSettings, DECLARE_SETTING_TRAIT) /** Settings for the Set engine. * Could be loaded from a CREATE TABLE query (SETTINGS clause). */ -struct SetSettings : public BaseSettings +struct SetSettings { + SetSettings(); + SetSettings(const SetSettings & settings); + SetSettings(SetSettings && settings) noexcept; + ~SetSettings(); + + SET_SETTINGS_SUPPORTED_TYPES(SetSettings, DECLARE_SETTING_SUBSCRIPT_OPERATOR) + void loadFromQuery(ASTStorage & storage_def); + +private: + std::unique_ptr impl; }; } diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index b961b856672..4f5a95ab508 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -12,6 +12,7 @@ #include #include +#include #include #include #include @@ -99,6 +100,8 @@ #include #include +#include + #include #include #include @@ -160,6 +163,18 @@ namespace Setting extern const SettingsUInt64 parallel_distributed_insert_select; } +namespace DistributedSetting +{ + extern const DistributedSettingsUInt64 background_insert_batch; + extern const DistributedSettingsMilliseconds background_insert_max_sleep_time_ms; + extern const DistributedSettingsMilliseconds background_insert_sleep_time_ms; + extern const DistributedSettingsUInt64 background_insert_split_batch_on_failure; + extern const DistributedSettingsUInt64 bytes_to_delay_insert; + extern const DistributedSettingsUInt64 bytes_to_throw_insert; + extern const DistributedSettingsBool flush_on_detach; + extern const DistributedSettingsUInt64 max_delay_to_insert; +} + namespace ErrorCodes { extern const int LOGICAL_ERROR; @@ -353,11 +368,11 @@ StorageDistributed::StorageDistributed( , cluster_name(getContext()->getMacros()->expand(cluster_name_)) , has_sharding_key(sharding_key_) , relative_data_path(relative_data_path_) - , distributed_settings(distributed_settings_) + , distributed_settings(std::make_unique(distributed_settings_)) , rng(randomSeed()) , is_remote_function(is_remote_function_) { - if (!distributed_settings.flush_on_detach && distributed_settings.background_insert_batch) + if (!(*distributed_settings)[DistributedSetting::flush_on_detach] && (*distributed_settings)[DistributedSetting::background_insert_batch]) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Settings flush_on_detach=0 and background_insert_batch=1 are incompatible"); StorageInMemoryMetadata storage_metadata; @@ -893,7 +908,7 @@ void StorageDistributed::read( modified_query_info, sharding_key_expr, sharding_key_column_name, - distributed_settings, + *distributed_settings, shard_filter_generator, is_remote_function); @@ -1693,7 +1708,7 @@ void StorageDistributed::flushAndPrepareForShutdown() { try { - flushClusterNodesAllDataImpl(getContext(), /* settings_changes= */ {}, getDistributedSettingsRef().flush_on_detach); + flushClusterNodesAllDataImpl(getContext(), /* settings_changes= */ {}, (*distributed_settings)[DistributedSetting::flush_on_detach]); } catch (...) { @@ -1805,32 +1820,32 @@ void StorageDistributed::renameOnDisk(const String & new_path_to_table_data) void StorageDistributed::delayInsertOrThrowIfNeeded() const { - if (!distributed_settings.bytes_to_throw_insert && - !distributed_settings.bytes_to_delay_insert) + if (!(*distributed_settings)[DistributedSetting::bytes_to_throw_insert] && + !(*distributed_settings)[DistributedSetting::bytes_to_delay_insert]) return; UInt64 total_bytes = *totalBytes(getContext()->getSettingsRef()); - if (distributed_settings.bytes_to_throw_insert && total_bytes > distributed_settings.bytes_to_throw_insert) + if ((*distributed_settings)[DistributedSetting::bytes_to_throw_insert] && total_bytes > (*distributed_settings)[DistributedSetting::bytes_to_throw_insert]) { ProfileEvents::increment(ProfileEvents::DistributedRejectedInserts); throw Exception(ErrorCodes::DISTRIBUTED_TOO_MANY_PENDING_BYTES, "Too many bytes pending for async INSERT: {} (bytes_to_throw_insert={})", formatReadableSizeWithBinarySuffix(total_bytes), - formatReadableSizeWithBinarySuffix(distributed_settings.bytes_to_throw_insert)); + formatReadableSizeWithBinarySuffix((*distributed_settings)[DistributedSetting::bytes_to_throw_insert])); } - if (distributed_settings.bytes_to_delay_insert && total_bytes > distributed_settings.bytes_to_delay_insert) + if ((*distributed_settings)[DistributedSetting::bytes_to_delay_insert] && total_bytes > (*distributed_settings)[DistributedSetting::bytes_to_delay_insert]) { /// Step is 5% of the delay and minimal one second. /// NOTE: max_delay_to_insert is in seconds, and step is in ms. - const size_t step_ms = static_cast(std::min(1., static_cast(distributed_settings.max_delay_to_insert) * 1'000 * 0.05)); + const size_t step_ms = static_cast(std::min(1., static_cast((*distributed_settings)[DistributedSetting::max_delay_to_insert]) * 1'000 * 0.05)); UInt64 delayed_ms = 0; do { delayed_ms += step_ms; std::this_thread::sleep_for(std::chrono::milliseconds(step_ms)); - } while (*totalBytes(getContext()->getSettingsRef()) > distributed_settings.bytes_to_delay_insert && delayed_ms < distributed_settings.max_delay_to_insert*1000); + } while (*totalBytes(getContext()->getSettingsRef()) > (*distributed_settings)[DistributedSetting::bytes_to_delay_insert] && delayed_ms < (*distributed_settings)[DistributedSetting::max_delay_to_insert]*1000); ProfileEvents::increment(ProfileEvents::DistributedDelayedInserts); ProfileEvents::increment(ProfileEvents::DistributedDelayedInsertsMilliseconds, delayed_ms); @@ -1841,13 +1856,13 @@ void StorageDistributed::delayInsertOrThrowIfNeeded() const formatReadableSizeWithBinarySuffix(new_total_bytes), delayed_ms); - if (new_total_bytes > distributed_settings.bytes_to_delay_insert) + if (new_total_bytes > (*distributed_settings)[DistributedSetting::bytes_to_delay_insert]) { ProfileEvents::increment(ProfileEvents::DistributedRejectedInserts); throw Exception(ErrorCodes::DISTRIBUTED_TOO_MANY_PENDING_BYTES, "Too many bytes pending for async INSERT: {} (bytes_to_delay_insert={})", formatReadableSizeWithBinarySuffix(new_total_bytes), - formatReadableSizeWithBinarySuffix(distributed_settings.bytes_to_delay_insert)); + formatReadableSizeWithBinarySuffix((*distributed_settings)[DistributedSetting::bytes_to_delay_insert])); } } } @@ -1922,27 +1937,27 @@ void registerStorageDistributed(StorageFactory & factory) distributed_settings.loadFromQuery(*args.storage_def); } - if (distributed_settings.max_delay_to_insert < 1) + if (distributed_settings[DistributedSetting::max_delay_to_insert] < 1) throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "max_delay_to_insert cannot be less then 1"); - if (distributed_settings.bytes_to_throw_insert && distributed_settings.bytes_to_delay_insert && - distributed_settings.bytes_to_throw_insert <= distributed_settings.bytes_to_delay_insert) + if (distributed_settings[DistributedSetting::bytes_to_throw_insert] && distributed_settings[DistributedSetting::bytes_to_delay_insert] && + distributed_settings[DistributedSetting::bytes_to_throw_insert] <= distributed_settings[DistributedSetting::bytes_to_delay_insert]) { throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "bytes_to_throw_insert cannot be less or equal to bytes_to_delay_insert (since it is handled first)"); } /// Set default values from the distributed_background_insert_* global context settings. - if (!distributed_settings.background_insert_batch.changed) - distributed_settings.background_insert_batch = context->getSettingsRef()[Setting::distributed_background_insert_batch]; - if (!distributed_settings.background_insert_split_batch_on_failure.changed) - distributed_settings.background_insert_split_batch_on_failure + if (!distributed_settings[DistributedSetting::background_insert_batch].changed) + distributed_settings[DistributedSetting::background_insert_batch] = context->getSettingsRef()[Setting::distributed_background_insert_batch]; + if (!distributed_settings[DistributedSetting::background_insert_split_batch_on_failure].changed) + distributed_settings[DistributedSetting::background_insert_split_batch_on_failure] = context->getSettingsRef()[Setting::distributed_background_insert_split_batch_on_failure]; - if (!distributed_settings.background_insert_sleep_time_ms.changed) - distributed_settings.background_insert_sleep_time_ms = context->getSettingsRef()[Setting::distributed_background_insert_sleep_time_ms]; - if (!distributed_settings.background_insert_max_sleep_time_ms.changed) - distributed_settings.background_insert_max_sleep_time_ms + if (!distributed_settings[DistributedSetting::background_insert_sleep_time_ms].changed) + distributed_settings[DistributedSetting::background_insert_sleep_time_ms] = context->getSettingsRef()[Setting::distributed_background_insert_sleep_time_ms]; + if (!distributed_settings[DistributedSetting::background_insert_max_sleep_time_ms].changed) + distributed_settings[DistributedSetting::background_insert_max_sleep_time_ms] = context->getSettingsRef()[Setting::distributed_background_insert_max_sleep_time_ms]; return std::make_shared( diff --git a/src/Storages/StorageDistributed.h b/src/Storages/StorageDistributed.h index 8a5585e9fd0..0fd6d8fdcc3 100644 --- a/src/Storages/StorageDistributed.h +++ b/src/Storages/StorageDistributed.h @@ -3,7 +3,6 @@ #include #include #include -#include #include #include #include @@ -17,6 +16,7 @@ namespace DB { +struct DistributedSettings; struct Settings; class Context; @@ -217,7 +217,7 @@ private: size_t getRandomShardIndex(const Cluster::ShardsInfo & shards); std::string getClusterName() const { return cluster_name.empty() ? "" : cluster_name; } - const DistributedSettings & getDistributedSettingsRef() const { return distributed_settings; } + const DistributedSettings & getDistributedSettingsRef() const { return *distributed_settings; } void delayInsertOrThrowIfNeeded() const; @@ -259,7 +259,7 @@ private: /// Other volumes will be ignored. It's needed to allow using the same multi-volume policy both for Distributed and other engines. VolumePtr data_volume; - DistributedSettings distributed_settings; + std::unique_ptr distributed_settings; struct ClusterNodeData { diff --git a/src/Storages/StorageExecutable.cpp b/src/Storages/StorageExecutable.cpp index dd1b70364e2..013acb04f3e 100644 --- a/src/Storages/StorageExecutable.cpp +++ b/src/Storages/StorageExecutable.cpp @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -35,6 +36,18 @@ namespace Setting extern const SettingsSeconds max_execution_time; } +namespace ExecutableSetting +{ + extern const ExecutableSettingsBool send_chunk_header; + extern const ExecutableSettingsUInt64 pool_size; + extern const ExecutableSettingsUInt64 max_command_execution_time; + extern const ExecutableSettingsUInt64 command_termination_timeout; + extern const ExecutableSettingsUInt64 command_read_timeout; + extern const ExecutableSettingsUInt64 command_write_timeout; + extern const ExecutableSettingsExternalCommandStderrReaction stderr_reaction; + extern const ExecutableSettingsBool check_exit_code; +} + namespace ErrorCodes { extern const int BAD_ARGUMENTS; @@ -85,9 +98,9 @@ StorageExecutable::StorageExecutable( const ConstraintsDescription & constraints, const String & comment) : IStorage(table_id_) - , settings(settings_) + , settings(std::make_unique(settings_)) , input_queries(input_queries_) - , log(settings.is_executable_pool ? getLogger("StorageExecutablePool") : getLogger("StorageExecutable")) + , log(settings->is_executable_pool ? getLogger("StorageExecutablePool") : getLogger("StorageExecutable")) { StorageInMemoryMetadata storage_metadata; storage_metadata.setColumns(columns); @@ -98,23 +111,32 @@ StorageExecutable::StorageExecutable( ShellCommandSourceCoordinator::Configuration configuration { .format = format, - .command_termination_timeout_seconds = settings.command_termination_timeout, - .command_read_timeout_milliseconds = settings.command_read_timeout, - .command_write_timeout_milliseconds = settings.command_write_timeout, - .stderr_reaction = settings.stderr_reaction, - .check_exit_code = settings.check_exit_code, + .command_termination_timeout_seconds = (*settings)[ExecutableSetting::command_termination_timeout], + .command_read_timeout_milliseconds = (*settings)[ExecutableSetting::command_read_timeout], + .command_write_timeout_milliseconds = (*settings)[ExecutableSetting::command_write_timeout], + .stderr_reaction = (*settings)[ExecutableSetting::stderr_reaction], + .check_exit_code = (*settings)[ExecutableSetting::check_exit_code], - .pool_size = settings.pool_size, - .max_command_execution_time_seconds = settings.max_command_execution_time, + .pool_size = (*settings)[ExecutableSetting::pool_size], + .max_command_execution_time_seconds = (*settings)[ExecutableSetting::max_command_execution_time], - .is_executable_pool = settings.is_executable_pool, - .send_chunk_header = settings.send_chunk_header, + .is_executable_pool = settings->is_executable_pool, + .send_chunk_header = (*settings)[ExecutableSetting::send_chunk_header], .execute_direct = true }; coordinator = std::make_unique(std::move(configuration)); } +StorageExecutable::~StorageExecutable() = default; + +String StorageExecutable::getName() const +{ + if (settings->is_executable_pool) + return "ExecutablePool"; + return "Executable"; +} + void StorageExecutable::read( QueryPlan & query_plan, const Names & column_names, @@ -125,7 +147,7 @@ void StorageExecutable::read( size_t max_block_size, size_t /*threads*/) { - auto & script_name = settings.script_name; + auto & script_name = settings->script_name; auto user_scripts_path = context->getUserScriptsPath(); auto script_path = user_scripts_path + '/' + script_name; @@ -163,7 +185,7 @@ void StorageExecutable::read( } /// For executable pool we read data from input streams and convert it to single blocks streams. - if (settings.is_executable_pool) + if (settings->is_executable_pool) transformToSingleBlockSources(inputs); auto sample_block = storage_snapshot->metadata->getSampleBlock(); @@ -171,13 +193,13 @@ void StorageExecutable::read( ShellCommandSourceConfiguration configuration; configuration.max_block_size = max_block_size; - if (settings.is_executable_pool) + if (settings->is_executable_pool) { configuration.read_fixed_number_of_rows = true; configuration.read_number_of_rows_from_process_output = true; } - auto pipe = coordinator->createPipe(script_path, settings.script_arguments, std::move(inputs), std::move(sample_block), context, configuration); + auto pipe = coordinator->createPipe(script_path, settings->script_arguments, std::move(inputs), std::move(sample_block), context, configuration); IStorage::readFromPipe(query_plan, std::move(pipe), column_names, storage_snapshot, query_info, context, getName()); query_plan.addResources(std::move(resources)); } @@ -237,7 +259,7 @@ void registerStorageExecutable(StorageFactory & factory) if (max_execution_time_seconds != 0 && max_command_execution_time > max_execution_time_seconds) max_command_execution_time = max_execution_time_seconds; - settings.max_command_execution_time = max_command_execution_time; + settings[ExecutableSetting::max_command_execution_time] = max_command_execution_time; } if (args.storage_def->settings) diff --git a/src/Storages/StorageExecutable.h b/src/Storages/StorageExecutable.h index 90a7d0f950d..66c24eb06d8 100644 --- a/src/Storages/StorageExecutable.h +++ b/src/Storages/StorageExecutable.h @@ -2,11 +2,11 @@ #include #include -#include namespace DB { +struct ExecutableSettings; /** * This class represents table engine for external executable files. @@ -25,12 +25,9 @@ public: const ConstraintsDescription & constraints, const String & comment); - String getName() const override - { - if (settings.is_executable_pool) - return "ExecutablePool"; - return "Executable"; - } + ~StorageExecutable() override; + + String getName() const override; void read( QueryPlan & query_plan, @@ -43,7 +40,7 @@ public: size_t threads) override; private: - ExecutableSettings settings; + std::unique_ptr settings; std::vector input_queries; LoggerPtr log; std::unique_ptr coordinator; diff --git a/src/Storages/StorageMemory.cpp b/src/Storages/StorageMemory.cpp index 84eed6dbbe6..71447889d86 100644 --- a/src/Storages/StorageMemory.cpp +++ b/src/Storages/StorageMemory.cpp @@ -44,6 +44,15 @@ namespace Setting extern const SettingsUInt64 max_compress_block_size; } +namespace MemorySetting +{ + extern const MemorySettingsBool compress; + extern const MemorySettingsUInt64 max_bytes_to_keep; + extern const MemorySettingsUInt64 max_rows_to_keep; + extern const MemorySettingsUInt64 min_bytes_to_keep; + extern const MemorySettingsUInt64 min_rows_to_keep; +} + namespace ErrorCodes { extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; @@ -78,7 +87,7 @@ public: convertDynamicColumnsToTuples(block, storage_snapshot); } - if (storage.getMemorySettingsRef().compress) + if (storage.getMemorySettingsRef()[MemorySetting::compress]) { Block compressed_block; for (const auto & elem : block) @@ -110,14 +119,14 @@ public: UInt64 new_total_bytes = storage.total_size_bytes.load(std::memory_order_relaxed) + inserted_bytes; const auto & memory_settings = storage.getMemorySettingsRef(); while (!new_data->empty() - && ((memory_settings.max_bytes_to_keep && new_total_bytes > memory_settings.max_bytes_to_keep) - || (memory_settings.max_rows_to_keep && new_total_rows > memory_settings.max_rows_to_keep))) + && ((memory_settings[MemorySetting::max_bytes_to_keep] && new_total_bytes > memory_settings[MemorySetting::max_bytes_to_keep]) + || (memory_settings[MemorySetting::max_rows_to_keep] && new_total_rows > memory_settings[MemorySetting::max_rows_to_keep]))) { Block oldest_block = new_data->front(); UInt64 rows_to_remove = oldest_block.rows(); UInt64 bytes_to_remove = oldest_block.allocatedBytes(); - if (new_total_bytes - bytes_to_remove < memory_settings.min_bytes_to_keep - || new_total_rows - rows_to_remove < memory_settings.min_rows_to_keep) + if (new_total_bytes - bytes_to_remove < memory_settings[MemorySetting::min_bytes_to_keep] + || new_total_rows - rows_to_remove < memory_settings[MemorySetting::min_rows_to_keep]) { break; // stop - removing next block will put us under min_bytes / min_rows threshold } @@ -151,16 +160,18 @@ StorageMemory::StorageMemory( const MemorySettings & memory_settings_) : IStorage(table_id_) , data(std::make_unique()) - , memory_settings(memory_settings_) + , memory_settings(std::make_unique(memory_settings_)) { StorageInMemoryMetadata storage_metadata; storage_metadata.setColumns(std::move(columns_description_)); storage_metadata.setConstraints(std::move(constraints_)); storage_metadata.setComment(comment); - storage_metadata.setSettingsChanges(memory_settings.getSettingsChangesQuery()); + storage_metadata.setSettingsChanges(memory_settings->getSettingsChangesQuery()); setInMemoryMetadata(storage_metadata); } +StorageMemory::~StorageMemory() = default; + StorageSnapshotPtr StorageMemory::getStorageSnapshot(const StorageMetadataPtr & metadata_snapshot, ContextPtr /*query_context*/) const { auto snapshot_data = std::make_unique(); @@ -246,7 +257,7 @@ void StorageMemory::mutate(const MutationCommands & commands, ContextPtr context Block block; while (executor.pull(block)) { - if (memory_settings.compress) + if ((*memory_settings)[MemorySetting::compress]) for (auto & elem : block) elem.column = elem.column->compress(); @@ -310,14 +321,14 @@ void StorageMemory::alter(const DB::AlterCommands & params, DB::ContextPtr conte if (params.isSettingsAlter()) { auto & settings_changes = new_metadata.settings_changes->as(); - auto changed_settings = memory_settings; + auto changed_settings = *memory_settings; changed_settings.applyChanges(settings_changes.changes); changed_settings.sanityCheck(); /// When modifying the values of max_bytes_to_keep and max_rows_to_keep to be smaller than the old values, /// the old data needs to be removed. - if (!memory_settings.max_bytes_to_keep || memory_settings.max_bytes_to_keep > changed_settings.max_bytes_to_keep - || !memory_settings.max_rows_to_keep || memory_settings.max_rows_to_keep > changed_settings.max_rows_to_keep) + if (!(*memory_settings)[MemorySetting::max_bytes_to_keep] || (*memory_settings)[MemorySetting::max_bytes_to_keep] > changed_settings[MemorySetting::max_bytes_to_keep] + || !(*memory_settings)[MemorySetting::max_rows_to_keep] || (*memory_settings)[MemorySetting::max_rows_to_keep] > changed_settings[MemorySetting::max_rows_to_keep]) { std::lock_guard lock(mutex); @@ -325,14 +336,14 @@ void StorageMemory::alter(const DB::AlterCommands & params, DB::ContextPtr conte UInt64 new_total_rows = total_size_rows.load(std::memory_order_relaxed); UInt64 new_total_bytes = total_size_bytes.load(std::memory_order_relaxed); while (!new_data->empty() - && ((changed_settings.max_bytes_to_keep && new_total_bytes > changed_settings.max_bytes_to_keep) - || (changed_settings.max_rows_to_keep && new_total_rows > changed_settings.max_rows_to_keep))) + && ((changed_settings[MemorySetting::max_bytes_to_keep] && new_total_bytes > changed_settings[MemorySetting::max_bytes_to_keep]) + || (changed_settings[MemorySetting::max_rows_to_keep] && new_total_rows > changed_settings[MemorySetting::max_rows_to_keep]))) { Block oldest_block = new_data->front(); UInt64 rows_to_remove = oldest_block.rows(); UInt64 bytes_to_remove = oldest_block.allocatedBytes(); - if (new_total_bytes - bytes_to_remove < changed_settings.min_bytes_to_keep - || new_total_rows - rows_to_remove < changed_settings.min_rows_to_keep) + if (new_total_bytes - bytes_to_remove < changed_settings[MemorySetting::min_bytes_to_keep] + || new_total_rows - rows_to_remove < changed_settings[MemorySetting::min_rows_to_keep]) { break; // stop - removing next block will put us under min_bytes / min_rows threshold } @@ -347,7 +358,7 @@ void StorageMemory::alter(const DB::AlterCommands & params, DB::ContextPtr conte total_size_rows.store(new_total_rows, std::memory_order_relaxed); total_size_bytes.store(new_total_bytes, std::memory_order_relaxed); } - memory_settings = std::move(changed_settings); + *memory_settings = std::move(changed_settings); } DatabaseCatalog::instance().getDatabase(table_id.database_name)->alterTable(context, table_id, new_metadata); @@ -559,7 +570,7 @@ void StorageMemory::restoreDataImpl(const BackupPtr & backup, const String & dat while (auto block = block_in.read()) { - if (memory_settings.compress) + if ((*memory_settings)[MemorySetting::compress]) { Block compressed_block; for (const auto & elem : block) diff --git a/src/Storages/StorageMemory.h b/src/Storages/StorageMemory.h index 57fccb98e06..7a9b201c500 100644 --- a/src/Storages/StorageMemory.h +++ b/src/Storages/StorageMemory.h @@ -7,7 +7,6 @@ #include #include #include -#include #include @@ -15,6 +14,7 @@ namespace DB { class IBackup; using BackupPtr = std::shared_ptr; +struct MemorySettings; /** Implements storage in the RAM. * Suitable for temporary data. @@ -31,7 +31,9 @@ public: ColumnsDescription columns_description_, ConstraintsDescription constraints_, const String & comment, - const MemorySettings & memory_settings_ = MemorySettings()); + const MemorySettings & memory_settings_); + + ~StorageMemory() override; String getName() const override { return "Memory"; } @@ -47,7 +49,7 @@ public: StorageSnapshotPtr getStorageSnapshot(const StorageMetadataPtr & metadata_snapshot, ContextPtr query_context) const override; - const MemorySettings & getMemorySettingsRef() const { return memory_settings; } + const MemorySettings & getMemorySettingsRef() const { return *memory_settings; } void read( QueryPlan & query_plan, @@ -139,7 +141,7 @@ private: std::atomic total_size_bytes = 0; std::atomic total_size_rows = 0; - MemorySettings memory_settings; + std::unique_ptr memory_settings; friend class ReadFromMemoryStorageStep; }; diff --git a/src/Storages/StorageMySQL.cpp b/src/Storages/StorageMySQL.cpp index cefdc40df22..6d1394ddb40 100644 --- a/src/Storages/StorageMySQL.cpp +++ b/src/Storages/StorageMySQL.cpp @@ -2,6 +2,7 @@ #if USE_MYSQL +#include #include #include #include @@ -36,6 +37,12 @@ namespace Setting extern const SettingsUInt64 mysql_max_rows_to_insert; } +namespace MySQLSetting +{ + extern const MySQLSettingsBool connection_auto_close; + extern const MySQLSettingsUInt64 connection_pool_size; +} + namespace ErrorCodes { extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; @@ -61,7 +68,7 @@ StorageMySQL::StorageMySQL( , remote_table_name(remote_table_name_) , replace_query{replace_query_} , on_duplicate_clause{on_duplicate_clause_} - , mysql_settings(mysql_settings_) + , mysql_settings(std::make_unique(mysql_settings_)) , pool(std::make_shared(pool_)) , log(getLogger("StorageMySQL (" + table_id_.table_name + ")")) { @@ -132,7 +139,7 @@ Pipe StorageMySQL::read( StreamSettings mysql_input_stream_settings(context_->getSettingsRef(), - mysql_settings.connection_auto_close); + (*mysql_settings)[MySQLSetting::connection_auto_close]); return Pipe(std::make_shared(pool, query, sample_block, mysql_input_stream_settings)); } @@ -269,9 +276,9 @@ StorageMySQL::Configuration StorageMySQL::processNamedCollectionResult( StorageMySQL::Configuration configuration; ValidateKeysMultiset optional_arguments = {"replace_query", "on_duplicate_clause", "addresses_expr", "host", "hostname", "port"}; - auto mysql_settings = storage_settings.all(); - for (const auto & setting : mysql_settings) - optional_arguments.insert(setting.getName()); + auto mysql_settings_names = storage_settings.getAllRegisteredNames(); + for (const auto & name : mysql_settings_names) + optional_arguments.insert(name); ValidateKeysMultiset required_arguments = {"user", "username", "password", "database", "db"}; if (require_table) @@ -300,12 +307,7 @@ StorageMySQL::Configuration StorageMySQL::processNamedCollectionResult( configuration.replace_query = named_collection.getOrDefault("replace_query", false); configuration.on_duplicate_clause = named_collection.getOrDefault("on_duplicate_clause", ""); - for (const auto & setting : mysql_settings) - { - const auto & setting_name = setting.getName(); - if (named_collection.has(setting_name)) - storage_settings.set(setting_name, named_collection.get(setting_name)); - } + storage_settings.loadFromNamedCollection(named_collection); return configuration; } @@ -360,7 +362,7 @@ void registerStorageMySQL(StorageFactory & factory) if (args.storage_def->settings) mysql_settings.loadFromQuery(*args.storage_def); - if (!mysql_settings.connection_pool_size) + if (!mysql_settings[MySQLSetting::connection_pool_size]) throw Exception(ErrorCodes::BAD_ARGUMENTS, "connection_pool_size cannot be zero."); mysqlxx::PoolWithFailover pool = createMySQLPoolWithFailover(configuration, mysql_settings); diff --git a/src/Storages/StorageMySQL.h b/src/Storages/StorageMySQL.h index daabd66a530..ca08253bbe3 100644 --- a/src/Storages/StorageMySQL.h +++ b/src/Storages/StorageMySQL.h @@ -5,7 +5,6 @@ #if USE_MYSQL #include -#include #include namespace Poco @@ -16,6 +15,7 @@ class Logger; namespace DB { +struct MySQLSettings; class NamedCollection; /** Implements storage in the MySQL database. @@ -88,7 +88,7 @@ private: bool replace_query; std::string on_duplicate_clause; - MySQLSettings mysql_settings; + std::unique_ptr mysql_settings; mysqlxx::PoolWithFailoverPtr pool; diff --git a/src/Storages/StorageSet.cpp b/src/Storages/StorageSet.cpp index 2562378e10b..61233782aef 100644 --- a/src/Storages/StorageSet.cpp +++ b/src/Storages/StorageSet.cpp @@ -24,18 +24,18 @@ namespace fs = std::filesystem; namespace DB { -namespace ErrorCodes +namespace SetSetting { - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const SetSettingsString disk; + extern const SetSettingsBool persistent; } - namespace ErrorCodes { extern const int INCORRECT_FILE_NAME; + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; } - class SetOrJoinSink : public SinkToStorage, WithContext { public: @@ -322,9 +322,9 @@ void registerStorageSet(StorageFactory & factory) if (has_settings) set_settings.loadFromQuery(*args.storage_def); - DiskPtr disk = args.getContext()->getDisk(set_settings.disk); + DiskPtr disk = args.getContext()->getDisk(set_settings[SetSetting::disk]); return std::make_shared( - disk, args.relative_data_path, args.table_id, args.columns, args.constraints, args.comment, set_settings.persistent); + disk, args.relative_data_path, args.table_id, args.columns, args.constraints, args.comment, set_settings[SetSetting::persistent]); }, StorageFactory::StorageFeatures{ .supports_settings = true, }); } diff --git a/src/Storages/System/StorageSystemBuildOptions.cpp.in b/src/Storages/System/StorageSystemBuildOptions.cpp.in index 9e5adbfe825..f4af8071ddd 100644 --- a/src/Storages/System/StorageSystemBuildOptions.cpp.in +++ b/src/Storages/System/StorageSystemBuildOptions.cpp.in @@ -62,7 +62,6 @@ const char * auto_config_build[] "USE_ARROW", "@USE_ARROW@", "USE_ORC", "@USE_ORC@", "USE_MSGPACK", "@USE_MSGPACK@", - "USE_QPL", "@USE_QPL@", "USE_QATLIB", "@USE_QATLIB@", "GIT_HASH", "@GIT_HASH@", "GIT_BRANCH", R"IRjaNsZIL9Yh7FQ4(@GIT_BRANCH@)IRjaNsZIL9Yh7FQ4", diff --git a/src/Storages/System/StorageSystemObjectStorageQueueSettings.cpp b/src/Storages/System/StorageSystemObjectStorageQueueSettings.cpp new file mode 100644 index 00000000000..a6cf0ab255c --- /dev/null +++ b/src/Storages/System/StorageSystemObjectStorageQueueSettings.cpp @@ -0,0 +1,79 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +template +ColumnsDescription StorageSystemObjectStorageQueueSettings::getColumnsDescription() +{ + return ColumnsDescription + { + {"database", std::make_shared(), "Database of the table with S3Queue Engine."}, + {"table", std::make_shared(), "Name of the table with S3Queue Engine."}, + {"name", std::make_shared(), "Setting name."}, + {"value", std::make_shared(), "Setting value."}, + {"type", std::make_shared(), "Setting type (implementation specific string value)."}, + {"changed", std::make_shared(), "1 if the setting was explicitly defined in the config or explicitly changed."}, + {"description", std::make_shared(), "Setting description."}, + {"alterable", std::make_shared(), + "Shows whether the current user can change the setting via ALTER TABLE MODIFY SETTING: " + "0 — Current user can change the setting, " + "1 — Current user can't change the setting." + }, + }; +} + +template +void StorageSystemObjectStorageQueueSettings::fillData( + MutableColumns & res_columns, + ContextPtr context, + const ActionsDAG::Node *, + std::vector) const +{ + auto add_table = [&]( + const DatabaseTablesIteratorPtr & it, StorageObjectStorageQueue & storage) + { + if (storage.getType() != type) + return; + + auto constraints_and_current_profiles = context->getSettingsConstraintsAndCurrentProfiles(); + const auto & constraints = constraints_and_current_profiles->constraints; + MutableColumnsAndConstraints params(res_columns, constraints); + storage.getSettings().dumpToSystemEngineSettingsColumns(params, it->name(), it->databaseName(), storage); + }; + + const auto access = context->getAccess(); + const bool show_tables_granted = access->isGranted(AccessType::SHOW_TABLES); + if (show_tables_granted) + { + auto databases = DatabaseCatalog::instance().getDatabases(); + for (const auto & db : databases) + { + for (auto iterator = db.second->getTablesIterator(context); iterator->isValid(); iterator->next()) + { + StoragePtr storage = iterator->table(); + if (auto * queue_table = dynamic_cast(storage.get())) + { + add_table(iterator, *queue_table); + } + } + } + + } +} + +template class StorageSystemObjectStorageQueueSettings; +template class StorageSystemObjectStorageQueueSettings; +} diff --git a/src/Storages/System/StorageSystemObjectStorageQueueSettings.h b/src/Storages/System/StorageSystemObjectStorageQueueSettings.h new file mode 100644 index 00000000000..ff755913426 --- /dev/null +++ b/src/Storages/System/StorageSystemObjectStorageQueueSettings.h @@ -0,0 +1,30 @@ +#pragma once +#include +#include + +namespace DB +{ + +class Context; + +template +class StorageSystemObjectStorageQueueSettings final : public IStorageSystemOneBlock +{ +public: + static constexpr auto name = type == ObjectStorageType::S3 ? "SystemS3QueueSettings" : "SystemAzureQueueSettings"; + + std::string getName() const override { return name; } + + static ColumnsDescription getColumnsDescription(); + +protected: + using IStorageSystemOneBlock::IStorageSystemOneBlock; + + void fillData( + MutableColumns & res_columns, + ContextPtr context, + const ActionsDAG::Node *, + std::vector) const override; +}; + +} diff --git a/src/Storages/System/attachSystemTables.cpp b/src/Storages/System/attachSystemTables.cpp index 816ba5095b1..7c6dac7a608 100644 --- a/src/Storages/System/attachSystemTables.cpp +++ b/src/Storages/System/attachSystemTables.cpp @@ -92,6 +92,7 @@ #include #include #include +#include #include #include #include @@ -227,6 +228,8 @@ void attachSystemTablesServer(ContextPtr context, IDatabase & system_database, b attach(context, system_database, "user_processes", "This system table can be used to get overview of memory usage and ProfileEvents of users."); attachNoDescription(context, system_database, "jemalloc_bins", "Contains information about memory allocations done via jemalloc allocator in different size classes (bins) aggregated from all arenas. These statistics might not be absolutely accurate because of thread local caching in jemalloc."); attachNoDescription(context, system_database, "s3queue", "Contains in-memory state of S3Queue metadata and currently processed rows per file."); + attach>(context, system_database, "s3_queue_settings", "Contains a list of settings of S3Queue tables."); + attach>(context, system_database, "azure_queue_settings", "Contains a list of settings of AzureQueue tables."); attach(context, system_database, "dashboards", "Contains queries used by /dashboard page accessible though HTTP interface. This table can be useful for monitoring and troubleshooting. The table contains a row for every chart in a dashboard."); attach(context, system_database, "view_refreshes", "Lists all Refreshable Materialized Views of current server."); diff --git a/src/Storages/TTLDescription.cpp b/src/Storages/TTLDescription.cpp index 6daad8488ff..4845984cc88 100644 --- a/src/Storages/TTLDescription.cpp +++ b/src/Storages/TTLDescription.cpp @@ -30,7 +30,6 @@ namespace Setting extern const SettingsBool allow_suspicious_codecs; extern const SettingsBool allow_suspicious_ttl_expressions; extern const SettingsBool enable_zstd_qat_codec; - extern const SettingsBool enable_deflate_qpl_codec; } namespace ErrorCodes @@ -349,7 +348,7 @@ TTLDescription TTLDescription::getTTLFromAST( { result.recompression_codec = CompressionCodecFactory::instance().validateCodecAndGetPreprocessedAST( - ttl_element->recompression_codec, {}, !context->getSettingsRef()[Setting::allow_suspicious_codecs], context->getSettingsRef()[Setting::allow_experimental_codecs], context->getSettingsRef()[Setting::enable_deflate_qpl_codec], context->getSettingsRef()[Setting::enable_zstd_qat_codec]); + ttl_element->recompression_codec, {}, !context->getSettingsRef()[Setting::allow_suspicious_codecs], context->getSettingsRef()[Setting::allow_experimental_codecs], context->getSettingsRef()[Setting::enable_zstd_qat_codec]); } } diff --git a/src/Storages/TimeSeries/TimeSeriesSettings.cpp b/src/Storages/TimeSeries/TimeSeriesSettings.cpp index c8b9715250d..831199ffe5d 100644 --- a/src/Storages/TimeSeries/TimeSeriesSettings.cpp +++ b/src/Storages/TimeSeries/TimeSeriesSettings.cpp @@ -13,12 +13,13 @@ namespace ErrorCodes extern const int UNKNOWN_SETTING; } -#define LIST_OF_TIME_SERIES_SETTINGS(M, ALIAS) \ - M(Map, tags_to_columns, Map{}, "Map specifying which tags should be put to separate columns of the 'tags' table. Syntax: {'tag1': 'column1', 'tag2' : column2, ...}", 0) \ - M(Bool, use_all_tags_column_to_generate_id, true, "When generating an expression to calculate an identifier of a time series, this flag enables using the 'all_tags' column in that calculation. The 'all_tags' is a virtual column containing all tags except the metric name", 0) \ - M(Bool, store_min_time_and_max_time, true, "If set to true then the table will store 'min_time' and 'max_time' for each time series", 0) \ - M(Bool, aggregate_min_time_and_max_time, true, "When creating an inner target 'tags' table, this flag enables using 'SimpleAggregateFunction(min, Nullable(DateTime64(3)))' instead of just 'Nullable(DateTime64(3))' as the type of the 'min_time' column, and the same for the 'max_time' column", 0) \ - M(Bool, filter_by_min_time_and_max_time, true, "If set to true then the table will use the 'min_time' and 'max_time' columns for filtering time series", 0) \ + +#define LIST_OF_TIME_SERIES_SETTINGS(DECLARE, ALIAS) \ + DECLARE(Map, tags_to_columns, Map{}, "Map specifying which tags should be put to separate columns of the 'tags' table. Syntax: {'tag1': 'column1', 'tag2' : column2, ...}", 0) \ + DECLARE(Bool, use_all_tags_column_to_generate_id, true, "When generating an expression to calculate an identifier of a time series, this flag enables using the 'all_tags' column in that calculation. The 'all_tags' is a virtual column containing all tags except the metric name", 0) \ + DECLARE(Bool, store_min_time_and_max_time, true, "If set to true then the table will store 'min_time' and 'max_time' for each time series", 0) \ + DECLARE(Bool, aggregate_min_time_and_max_time, true, "When creating an inner target 'tags' table, this flag enables using 'SimpleAggregateFunction(min, Nullable(DateTime64(3)))' instead of just 'Nullable(DateTime64(3))' as the type of the 'min_time' column, and the same for the 'max_time' column", 0) \ + DECLARE(Bool, filter_by_min_time_and_max_time, true, "If set to true then the table will use the 'min_time' and 'max_time' columns for filtering time series", 0) \ DECLARE_SETTINGS_TRAITS(TimeSeriesSettingsTraits, LIST_OF_TIME_SERIES_SETTINGS) IMPLEMENT_SETTINGS_TRAITS(TimeSeriesSettingsTraits, LIST_OF_TIME_SERIES_SETTINGS) diff --git a/src/Storages/examples/merge_selector.cpp b/src/Storages/examples/merge_selector.cpp index b029a51a074..ae545f6cbd5 100644 --- a/src/Storages/examples/merge_selector.cpp +++ b/src/Storages/examples/merge_selector.cpp @@ -2,6 +2,7 @@ #include #include #include +#include /** This program tests merge-selecting algorithm. @@ -17,15 +18,12 @@ int main(int, char **) IMergeSelector::PartsRanges partitions(1); IMergeSelector::PartsRange & parts = partitions.back(); - SimpleMergeSelector::Settings settings; +/* SimpleMergeSelector::Settings settings; // settings.base = 2; -// settings.max_parts_to_merge_at_once = 10; - SimpleMergeSelector selector(settings); + settings.max_parts_to_merge_at_once = 10; + SimpleMergeSelector selector(settings);*/ -/* LevelMergeSelector::Settings settings; - settings.min_parts_to_merge = 8; - settings.max_parts_to_merge = 16; - LevelMergeSelector selector(settings);*/ + TrivialMergeSelector selector; ReadBufferFromFileDescriptor in(STDIN_FILENO); @@ -57,10 +55,11 @@ int main(int, char **) if (selected_parts.empty()) { - std::cout << '.'; - for (auto & part : parts) + //std::cout << '.'; + /*for (auto & part : parts) ++part.age; - continue; + continue;*/ + break; } std::cout << '\n'; diff --git a/src/Storages/tests/gtest_transform_query_for_external_database.cpp b/src/Storages/tests/gtest_transform_query_for_external_database.cpp index 5a63c118e2d..8d7fef57776 100644 --- a/src/Storages/tests/gtest_transform_query_for_external_database.cpp +++ b/src/Storages/tests/gtest_transform_query_for_external_database.cpp @@ -1,5 +1,6 @@ #include +#include #include #include #include @@ -105,7 +106,7 @@ private: context, table_name, std::make_shared( - StorageID(db_name, table_name), ColumnsDescription{tab.columns}, ConstraintsDescription{}, String{})); + StorageID(db_name, table_name), ColumnsDescription{tab.columns}, ConstraintsDescription{}, String{}, MemorySettings{})); } DatabaseCatalog::instance().attachDatabase(database->getDatabaseName(), database); diff --git a/src/TableFunctions/TableFunctionExecutable.cpp b/src/TableFunctions/TableFunctionExecutable.cpp index 12371f6ff82..d378db2a337 100644 --- a/src/TableFunctions/TableFunctionExecutable.cpp +++ b/src/TableFunctions/TableFunctionExecutable.cpp @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include diff --git a/src/TableFunctions/TableFunctionMySQL.cpp b/src/TableFunctions/TableFunctionMySQL.cpp index 833a12c9b68..6f6e4bd7e67 100644 --- a/src/TableFunctions/TableFunctionMySQL.cpp +++ b/src/TableFunctions/TableFunctionMySQL.cpp @@ -29,6 +29,12 @@ namespace Setting extern const SettingsUInt64 external_storage_rw_timeout_sec; } +namespace MySQLSetting +{ + extern const MySQLSettingsUInt64 connect_timeout; + extern const MySQLSettingsUInt64 read_write_timeout; +} + namespace ErrorCodes { extern const int LOGICAL_ERROR; @@ -72,8 +78,8 @@ void TableFunctionMySQL::parseArguments(const ASTPtr & ast_function, ContextPtr MySQLSettings mysql_settings; const auto & settings = context->getSettingsRef(); - mysql_settings.connect_timeout = settings[Setting::external_storage_connect_timeout_sec]; - mysql_settings.read_write_timeout = settings[Setting::external_storage_rw_timeout_sec]; + mysql_settings[MySQLSetting::connect_timeout] = settings[Setting::external_storage_connect_timeout_sec]; + mysql_settings[MySQLSetting::read_write_timeout] = settings[Setting::external_storage_rw_timeout_sec]; for (auto * it = args.begin(); it != args.end(); ++it) { diff --git a/src/TableFunctions/TableFunctionRemote.cpp b/src/TableFunctions/TableFunctionRemote.cpp index ed00fd2cef3..5d6d3e9ce47 100644 --- a/src/TableFunctions/TableFunctionRemote.cpp +++ b/src/TableFunctions/TableFunctionRemote.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -19,7 +20,6 @@ #include #include #include -#include #include "registerTableFunctions.h" diff --git a/src/configure_config.cmake b/src/configure_config.cmake index c67f8d290b3..94a013d21dd 100644 --- a/src/configure_config.cmake +++ b/src/configure_config.cmake @@ -135,9 +135,6 @@ endif() if (TARGET ch_contrib::vectorscan) set(USE_VECTORSCAN 1) endif() -if (TARGET ch_contrib::qpl) - set(USE_QPL 1) -endif() if (TARGET ch_contrib::qatlib) set(USE_QATLIB 1) endif() diff --git a/tests/ci/ci_settings.py b/tests/ci/ci_settings.py index bfff9abceb6..3f80d8b07f4 100644 --- a/tests/ci/ci_settings.py +++ b/tests/ci/ci_settings.py @@ -168,9 +168,12 @@ class CiSettings: to_deny = False if self.include_keywords: - # do not exclude builds - if job == CI.JobNames.STYLE_CHECK or CI.is_build_job(job): - # never exclude Style Check by include keywords + # never exclude builds, build report, style check + if ( + job == CI.JobNames.STYLE_CHECK + or CI.is_build_job(job) + or job == CI.JobNames.BUILD_CHECK + ): return True for keyword in self.include_keywords: if keyword in CI.Utils.normalize_string(job): diff --git a/tests/ci/pr_info.py b/tests/ci/pr_info.py index 29298908d43..8ed2f972183 100644 --- a/tests/ci/pr_info.py +++ b/tests/ci/pr_info.py @@ -410,7 +410,7 @@ class PRInfo: (ext in DIFF_IN_DOCUMENTATION_EXT and path_in_docs) or "docker/docs" in f or "Settings.cpp" in f - or "FormatFactorySettingsDeclaration.h" in f + or "FormatFactorySettings.h" in f ): return True return False diff --git a/tests/ci/stress.py b/tests/ci/stress.py index 3b3a6bcadb5..6b8a1d86e05 100755 --- a/tests/ci/stress.py +++ b/tests/ci/stress.py @@ -19,7 +19,6 @@ def get_options(i: int, upgrade_check: bool) -> str: if i % 3 == 2 and not upgrade_check: options.append(f'''--db-engine="Replicated('/test/db/test_{i}', 's1', 'r1')"''') - client_options.append("enable_deflate_qpl_codec=1") client_options.append("enable_zstd_qat_codec=1") # If database name is not specified, new database is created for each functional test. diff --git a/tests/ci/test_ci_options.py b/tests/ci/test_ci_options.py index e2dc71de469..536e18758f8 100644 --- a/tests/ci/test_ci_options.py +++ b/tests/ci/test_ci_options.py @@ -211,6 +211,7 @@ class TestCIOptions(unittest.TestCase): "Integration tests (release)", "Integration tests (asan)", "Integration tests flaky check (asan)", + "Builds", ], ) @@ -338,5 +339,6 @@ class TestCIOptions(unittest.TestCase): "package_msan", "package_ubsan", "binary_release", + "Builds", ], ) diff --git a/tests/config/install.sh b/tests/config/install.sh index fda74bd7a8d..be47298f6a4 100755 --- a/tests/config/install.sh +++ b/tests/config/install.sh @@ -115,6 +115,7 @@ ln -sf $SRC_PATH/test_function.xml $DEST_SERVER_PATH/ ln -sf $SRC_PATH/top_level_domains $DEST_SERVER_PATH/ ln -sf $SRC_PATH/regions_hierarchy.txt $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/regions_names_en.txt $DEST_SERVER_PATH/config.d/ +ln -sf $SRC_PATH/regions_names_es.txt $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/ext-en.txt $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/ext-ru.txt $DEST_SERVER_PATH/config.d/ diff --git a/tests/config/regions_names_es.txt b/tests/config/regions_names_es.txt new file mode 100644 index 00000000000..b7ddc755bf9 --- /dev/null +++ b/tests/config/regions_names_es.txt @@ -0,0 +1,12 @@ +1 Mundo +2 Estados Unidos +3 Colorado +4 Boulder County +5 Boulder +6 China +7 Sichuan +8 Chengdú +9 América +10 América del Norte +11 Eurasia +12 Asia diff --git a/tests/integration/compose/docker_compose_minio.yml b/tests/integration/compose/docker_compose_minio.yml index 44a07e97843..7fbe3796a0c 100644 --- a/tests/integration/compose/docker_compose_minio.yml +++ b/tests/integration/compose/docker_compose_minio.yml @@ -39,6 +39,10 @@ services: depends_on: - proxy1 - proxy2 + volumes: + - type: ${RESOLVER_LOGS_FS:-tmpfs} + source: ${RESOLVER_LOGS:-} + target: /var/log/resolver volumes: data1-1: diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index dc50a29362a..3c92df51ac4 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -563,6 +563,7 @@ class ClickHouseCluster: self.minio_redirect_ip = None self.minio_redirect_port = 8080 self.minio_docker_id = self.get_instance_docker_id(self.minio_host) + self.resolver_logs_dir = os.path.join(self.instances_dir, "resolver") self.spark_session = None @@ -1445,6 +1446,8 @@ class ClickHouseCluster: env_variables["MINIO_DATA_DIR"] = self.minio_data_dir env_variables["MINIO_PORT"] = str(self.minio_port) env_variables["SSL_CERT_FILE"] = p.join(self.base_dir, cert_d, "public.crt") + env_variables["RESOLVER_LOGS"] = self.resolver_logs_dir + env_variables["RESOLVER_LOGS_FS"] = "bind" self.base_cmd.extend( ["--file", p.join(docker_compose_yml_dir, "docker_compose_minio.yml")] @@ -2997,6 +3000,7 @@ class ClickHouseCluster: os.mkdir(self.minio_dir) if self.minio_certs_dir is None: os.mkdir(os.path.join(self.minio_dir, "certs")) + os.mkdir(os.path.join(self.minio_dir, "certs", "CAs")) else: shutil.copytree( os.path.join(self.base_dir, self.minio_certs_dir), @@ -3005,6 +3009,9 @@ class ClickHouseCluster: os.mkdir(self.minio_data_dir) os.chmod(self.minio_data_dir, stat.S_IRWXU | stat.S_IRWXO) + os.makedirs(self.resolver_logs_dir) + os.chmod(self.resolver_logs_dir, stat.S_IRWXU | stat.S_IRWXO) + minio_start_cmd = self.base_minio_cmd + common_opts logging.info( diff --git a/tests/integration/helpers/mock_servers.py b/tests/integration/helpers/mock_servers.py index a7674477787..a0febf011ab 100644 --- a/tests/integration/helpers/mock_servers.py +++ b/tests/integration/helpers/mock_servers.py @@ -31,9 +31,23 @@ def start_mock_servers(cluster, script_dir, mocks, timeout=100): server_name, ) + logs_dir = ( + "/var/log/resolver" + if container == "resolver" + else "/var/log/clickhouse-server" + ) + log_file = os.path.join(logs_dir, os.path.splitext(server_name)[0] + ".log") + err_log_file = os.path.join( + logs_dir, os.path.splitext(server_name)[0] + ".err.log" + ) + cluster.exec_in_container( container_id, - ["python3", server_name, str(port)], + [ + "bash", + "-c", + f"python3 {server_name} {port} >{log_file} 2>{err_log_file}", + ], detach=True, ) diff --git a/tests/integration/test_keeper_broken_logs/test.py b/tests/integration/test_keeper_broken_logs/test.py index f75e2ae4f20..be891f9b6c8 100644 --- a/tests/integration/test_keeper_broken_logs/test.py +++ b/tests/integration/test_keeper_broken_logs/test.py @@ -1,4 +1,5 @@ import time +from multiprocessing.dummy import Pool import pytest @@ -52,15 +53,34 @@ def get_fake_zk(nodename, timeout=30.0): return _fake_zk_instance +def start_clickhouse(node): + node.start_clickhouse() + + +def clean_start(): + nodes = [node1, node2, node3] + for node in nodes: + node.stop_clickhouse() + + p = Pool(3) + waiters = [] + for node in nodes: + node.exec_in_container(["rm", "-rf", "/var/lib/clickhouse/coordination/log"]) + node.exec_in_container( + ["rm", "-rf", "/var/lib/clickhouse/coordination/snapshots"] + ) + waiters.append(p.apply_async(start_clickhouse, (node,))) + + for waiter in waiters: + waiter.wait() + + def test_single_node_broken_log(started_cluster): + clean_start() try: wait_nodes() node1_conn = get_fake_zk("node1") - # Cleanup - if node1_conn.exists("/test_broken_log") != None: - node1_conn.delete("/test_broken_log") - node1_conn.create("/test_broken_log") for _ in range(10): node1_conn.create(f"/test_broken_log/node", b"somedata1", sequence=True) @@ -110,10 +130,12 @@ def test_single_node_broken_log(started_cluster): verify_nodes(node3_conn) assert node3_conn.get("/test_broken_log_final_node")[0] == b"somedata1" - assert ( + node1_logs = ( node1.exec_in_container(["ls", "/var/lib/clickhouse/coordination/log"]) - == "changelog_1_100000.bin\nchangelog_14_100013.bin\n" + .strip() + .split("\n") ) + assert len(node1_logs) == 2 and node1_logs[0] == "changelog_1_100000.bin" assert ( node2.exec_in_container(["ls", "/var/lib/clickhouse/coordination/log"]) == "changelog_1_100000.bin\n" diff --git a/tests/integration/test_merge_tree_s3/s3_mocks/no_delete_objects.py b/tests/integration/test_merge_tree_s3/s3_mocks/no_delete_objects.py index 68c1f43f13d..91735143be4 100644 --- a/tests/integration/test_merge_tree_s3/s3_mocks/no_delete_objects.py +++ b/tests/integration/test_merge_tree_s3/s3_mocks/no_delete_objects.py @@ -80,7 +80,6 @@ class RequestHandler(http.server.BaseHTTPRequestHandler): self.send_header(k, v) self.end_headers() self.wfile.write(r.content) - self.wfile.close() class ThreadedHTTPServer(socketserver.ThreadingMixIn, http.server.HTTPServer): diff --git a/tests/integration/test_merge_tree_s3/s3_mocks/unstable_proxy.py b/tests/integration/test_merge_tree_s3/s3_mocks/unstable_proxy.py index 21986c0d692..0e00c4094e7 100644 --- a/tests/integration/test_merge_tree_s3/s3_mocks/unstable_proxy.py +++ b/tests/integration/test_merge_tree_s3/s3_mocks/unstable_proxy.py @@ -70,7 +70,6 @@ class RequestHandler(http.server.BaseHTTPRequestHandler): if random.random() < 0.25 and len(r.content) > 1024 * 1024: r.content = r.content[: len(r.content) // 2] self.wfile.write(r.content) - self.wfile.close() class ThreadedHTTPServer(socketserver.ThreadingMixIn, http.server.HTTPServer): diff --git a/tests/integration/test_non_default_compression/configs/deflateqpl_compression_by_default.xml b/tests/integration/test_non_default_compression/configs/deflateqpl_compression_by_default.xml deleted file mode 100644 index 2ad6a0f1eff..00000000000 --- a/tests/integration/test_non_default_compression/configs/deflateqpl_compression_by_default.xml +++ /dev/null @@ -1,11 +0,0 @@ - - - - - 0 - 0 - - deflate_qpl - - - diff --git a/tests/integration/test_non_default_compression/configs/enable_deflateqpl_codec.xml b/tests/integration/test_non_default_compression/configs/enable_deflateqpl_codec.xml deleted file mode 100644 index 24e101e0e3f..00000000000 --- a/tests/integration/test_non_default_compression/configs/enable_deflateqpl_codec.xml +++ /dev/null @@ -1,7 +0,0 @@ - - - - 1 - - - diff --git a/tests/integration/test_non_default_compression/test.py b/tests/integration/test_non_default_compression/test.py index 29776eba176..bdff1d4fb20 100644 --- a/tests/integration/test_non_default_compression/test.py +++ b/tests/integration/test_non_default_compression/test.py @@ -37,19 +37,6 @@ node5 = cluster.add_instance( "configs/allow_suspicious_codecs.xml", ], ) -node6 = cluster.add_instance( - "node6", - main_configs=["configs/deflateqpl_compression_by_default.xml"], - user_configs=[ - "configs/allow_suspicious_codecs.xml", - "configs/enable_deflateqpl_codec.xml", - ], -) -node7 = cluster.add_instance( - "node7", - main_configs=["configs/allow_experimental_codecs.xml"], - user_configs=["configs/allow_suspicious_codecs.xml"], -) @pytest.fixture(scope="module") @@ -253,63 +240,3 @@ def test_uncompressed_cache_plus_zstd_codec(start_cluster): ) == "10000\n" ) - - -def test_preconfigured_deflateqpl_codec(start_cluster): - if is_arm(): - pytest.skip( - "Skipping test because it's special test for Intel code (doesn't work on ARM)" - ) - - node6.query( - """ - CREATE TABLE compression_codec_multiple_with_key ( - somedate Date CODEC(ZSTD, ZSTD, ZSTD(12), LZ4HC(12), DEFLATE_QPL), - id UInt64 CODEC(LZ4, ZSTD, NONE, LZ4HC, DEFLATE_QPL), - data String CODEC(ZSTD(2), LZ4HC, NONE, LZ4, LZ4, DEFLATE_QPL), - somecolumn Float64 - ) ENGINE = MergeTree() PARTITION BY somedate ORDER BY id SETTINGS index_granularity = 2; - """ - ) - node6.query( - "INSERT INTO compression_codec_multiple_with_key VALUES(toDate('2018-10-12'), 100000, 'hello', 88.88), (toDate('2018-10-12'), 100002, 'world', 99.99), (toDate('2018-10-12'), 1111, '!', 777.777)" - ) - assert ( - node6.query( - "SELECT COUNT(*) FROM compression_codec_multiple_with_key WHERE id % 2 == 0" - ) - == "2\n" - ) - assert ( - node6.query( - "SELECT DISTINCT somecolumn FROM compression_codec_multiple_with_key ORDER BY id" - ) - == "777.777\n88.88\n99.99\n" - ) - assert ( - node6.query( - "SELECT data FROM compression_codec_multiple_with_key WHERE id >= 1112 AND somedate = toDate('2018-10-12') AND somecolumn <= 100" - ) - == "hello\nworld\n" - ) - - node6.query( - "INSERT INTO compression_codec_multiple_with_key SELECT toDate('2018-10-12'), number, toString(number), 1.0 FROM system.numbers LIMIT 10000" - ) - - assert ( - node6.query( - "SELECT COUNT(id) FROM compression_codec_multiple_with_key WHERE id % 10 == 0" - ) - == "1001\n" - ) - assert ( - node6.query("SELECT SUM(somecolumn) FROM compression_codec_multiple_with_key") - == str(777.777 + 88.88 + 99.99 + 1.0 * 10000) + "\n" - ) - assert ( - node6.query( - "SELECT count(*) FROM compression_codec_multiple_with_key GROUP BY somedate" - ) - == "10003\n" - ) diff --git a/tests/integration/test_regexp_logger/__init__.py b/tests/integration/test_regexp_logger/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_regexp_logger/configs/log.xml b/tests/integration/test_regexp_logger/configs/log.xml new file mode 100644 index 00000000000..a85417d05b8 --- /dev/null +++ b/tests/integration/test_regexp_logger/configs/log.xml @@ -0,0 +1,6 @@ + + + trace + /var/log/clickhouse-server/clickhouse-server.log + + \ No newline at end of file diff --git a/tests/integration/test_regexp_logger/test.py b/tests/integration/test_regexp_logger/test.py new file mode 100644 index 00000000000..4f8a7e4be8f --- /dev/null +++ b/tests/integration/test_regexp_logger/test.py @@ -0,0 +1,74 @@ +import re + +import pytest + +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) +node = cluster.add_instance( + "node", with_zookeeper=False, main_configs=["configs/log.xml"] +) + +original_config = """ + + + trace + /var/log/clickhouse-server/clickhouse-server.log + + +""" + +updated_config = """ + + + trace + /var/log/clickhouse-server/clickhouse-server.log + .*Loaded config.* + + + executeQuery + .*Read.* + .*from.* + + + + +""" + + +@pytest.fixture(scope="module") +def start_cluster(): + try: + cluster.start() + yield cluster + + finally: + cluster.shutdown() + + +def test_regexp_pattern_update(start_cluster): + # Display config being used + node.exec_in_container(["cat", "/etc/clickhouse-server/config.d/log.xml"]) + + # Make sure that there are enough log messages for the test + for _ in range(5): + node.query("SYSTEM RELOAD CONFIG") + node.query("SELECT 1") + + assert node.contains_in_log(r".*Loaded config.*") + assert node.contains_in_log(r".*executeQuery.*Read.*") + assert node.contains_in_log(r".*executeQuery.*from.*") + + node.replace_config("/etc/clickhouse-server/config.d/log.xml", updated_config) + node.query("SYSTEM RELOAD CONFIG;") + node.rotate_logs() + + for _ in range(5): + node.query("SYSTEM RELOAD CONFIG") + node.query("SELECT 1") + + assert not node.contains_in_log(r".*Loaded config.*") + assert node.contains_in_log(r".*executeQuery.*Read.*") + assert not node.contains_in_log(r".*executeQuery.*from.*") + + node.replace_config("/etc/clickhouse-server/config.d/log.xml", original_config) diff --git a/tests/integration/test_storage_s3/s3_mocks/no_list_objects.py b/tests/integration/test_storage_s3/s3_mocks/no_list_objects.py index eec817e0eb3..df89250ff5e 100644 --- a/tests/integration/test_storage_s3/s3_mocks/no_list_objects.py +++ b/tests/integration/test_storage_s3/s3_mocks/no_list_objects.py @@ -110,7 +110,6 @@ class RequestHandler(http.server.BaseHTTPRequestHandler): self.send_header(k, v) self.end_headers() self.wfile.write(r.content) - self.wfile.close() class ThreadedHTTPServer(socketserver.ThreadingMixIn, http.server.HTTPServer): diff --git a/tests/integration/test_storage_s3_queue/test.py b/tests/integration/test_storage_s3_queue/test.py index 0d820334e70..a1fbf0882b6 100644 --- a/tests/integration/test_storage_s3_queue/test.py +++ b/tests/integration/test_storage_s3_queue/test.py @@ -2073,6 +2073,12 @@ def test_processing_threads(started_cluster): f"SELECT * FROM system.zookeeper WHERE path = '{keeper_path}'" ) + assert 16 == int( + node.query( + f"SELECT value FROM system.s3_queue_settings WHERE table = '{table_name}' and name = 'processing_threads_num'" + ) + ) + total_values = generate_random_files( started_cluster, files_path, files_to_generate, start_ind=0, row_num=1 ) diff --git a/tests/queries/0_stateless/00240_replace_substring_loop.reference b/tests/queries/0_stateless/00240_replace_substring_loop.reference index e32b5448f38..390ec161dc2 100644 --- a/tests/queries/0_stateless/00240_replace_substring_loop.reference +++ b/tests/queries/0_stateless/00240_replace_substring_loop.reference @@ -190,6 +190,3 @@ __.__ o_.__ o_.__ 1 __. o_. o_. 1 __.__ o_.__ o_.__ 1 __.__ o_.__ o_.__ 1 -ABCabc -ABCabc -ABCabc diff --git a/tests/queries/0_stateless/00240_replace_substring_loop.sql b/tests/queries/0_stateless/00240_replace_substring_loop.sql index 3757cc77395..2c9157d5946 100644 --- a/tests/queries/0_stateless/00240_replace_substring_loop.sql +++ b/tests/queries/0_stateless/00240_replace_substring_loop.sql @@ -99,6 +99,3 @@ SELECT s, replaceOne(s, '_', 'o') AS a, replaceRegexpOne(s, '_', 'o') AS b, a = SELECT s, replaceOne(s, '_', 'o') AS a, replaceRegexpOne(s, '_', 'o') AS b, a = b FROM (SELECT arrayJoin(['__.__', '.__']) AS s); SELECT s, replaceOne(s, '_', 'o') AS a, replaceRegexpOne(s, '_', 'o') AS b, a = b FROM (SELECT arrayJoin(['__.__', '__.']) AS s); SELECT s, replaceOne(s, '_', 'o') AS a, replaceRegexpOne(s, '_', 'o') AS b, a = b FROM (SELECT arrayJoin(['__.__', '__.__']) AS s); -SELECT replace('ABCabc', '', 'DEF'); -SELECT replace(materialize('ABCabc'), materialize(''), 'DEF'); -SELECT replace(materialize('ABCabc'), '', 'DEF'); diff --git a/tests/queries/0_stateless/00318_pk_tuple_order.sql b/tests/queries/0_stateless/00318_pk_tuple_order.sql index 585f35d2f3c..4eebbd74fe0 100644 --- a/tests/queries/0_stateless/00318_pk_tuple_order.sql +++ b/tests/queries/0_stateless/00318_pk_tuple_order.sql @@ -9,61 +9,61 @@ SET min_insert_block_size_rows = 0, min_insert_block_size_bytes = 0; SET max_block_size = 1; SET max_rows_to_read = 4; -SELECT * FROM pk WHERE x = 2 AND y = 11; +SELECT * FROM pk WHERE x = 2 AND y = 11 ORDER BY ALL; SET max_rows_to_read = 5; -SELECT * FROM pk WHERE x = 1; +SELECT * FROM pk WHERE x = 1 ORDER BY ALL; SET max_rows_to_read = 9; -SELECT * FROM pk WHERE x = 3; +SELECT * FROM pk WHERE x = 3 ORDER BY ALL; SET max_rows_to_read = 3; -SELECT * FROM pk WHERE x = 3 AND y = 44; +SELECT * FROM pk WHERE x = 3 AND y = 44 ORDER BY ALL; SET max_rows_to_read = 2; -SELECT * FROM pk WHERE x = 3 AND y = 44 AND z = 4935; -SELECT * FROM pk WHERE x = 3 AND y = 44 AND z = 4578; +SELECT * FROM pk WHERE x = 3 AND y = 44 AND z = 4935 ORDER BY ALL; +SELECT * FROM pk WHERE x = 3 AND y = 44 AND z = 4578 ORDER BY ALL; SET max_rows_to_read = 1; -SELECT * FROM pk WHERE x = 3 AND y = 44 AND z = 4934; -SELECT * FROM pk WHERE x = 3 AND y = 44 AND z = 4936; -SELECT * FROM pk WHERE x = 3 AND y = 44 AND z = 4577; -SELECT * FROM pk WHERE x = 3 AND y = 44 AND z = 4579; +SELECT * FROM pk WHERE x = 3 AND y = 44 AND z = 4934 ORDER BY ALL; +SELECT * FROM pk WHERE x = 3 AND y = 44 AND z = 4936 ORDER BY ALL; +SELECT * FROM pk WHERE x = 3 AND y = 44 AND z = 4577 ORDER BY ALL; +SELECT * FROM pk WHERE x = 3 AND y = 44 AND z = 4579 ORDER BY ALL; SET max_rows_to_read = 1; -SELECT * FROM pk WHERE x = 3 AND y = 55 AND z > 5786; +SELECT * FROM pk WHERE x = 3 AND y = 55 AND z > 5786 ORDER BY ALL; SET max_rows_to_read = 2; -SELECT * FROM pk WHERE x = 3 AND y = 55 AND z >= 5786; +SELECT * FROM pk WHERE x = 3 AND y = 55 AND z >= 5786 ORDER BY ALL; SET max_rows_to_read = 3; -SELECT * FROM pk WHERE x = 3 AND y = 55 AND z > 1235; +SELECT * FROM pk WHERE x = 3 AND y = 55 AND z > 1235 ORDER BY ALL; SET max_rows_to_read = 4; -SELECT * FROM pk WHERE x = 3 AND y = 55 AND z >= 1235; -SELECT * FROM pk WHERE x = 3 AND y = 55 AND z >= 1000; -SELECT * FROM pk WHERE x = 3 AND y = 55 AND z >= 1000 AND x < 10000; -SELECT * FROM pk WHERE x = 3 AND y = 55; -SELECT * FROM pk WHERE x = 3 AND y >= 50; -SELECT * FROM pk WHERE x = 3 AND y > 44; -SELECT * FROM pk WHERE x >= 3 AND y > 44; -SELECT * FROM pk WHERE x > 2 AND y > 44; +SELECT * FROM pk WHERE x = 3 AND y = 55 AND z >= 1235 ORDER BY ALL; +SELECT * FROM pk WHERE x = 3 AND y = 55 AND z >= 1000 ORDER BY ALL; +SELECT * FROM pk WHERE x = 3 AND y = 55 AND z >= 1000 AND x < 10000 ORDER BY ALL; +SELECT * FROM pk WHERE x = 3 AND y = 55 ORDER BY ALL; +SELECT * FROM pk WHERE x = 3 AND y >= 50 ORDER BY ALL; +SELECT * FROM pk WHERE x = 3 AND y > 44 ORDER BY ALL; +SELECT * FROM pk WHERE x >= 3 AND y > 44 ORDER BY ALL; +SELECT * FROM pk WHERE x > 2 AND y > 44 ORDER BY ALL; SET max_rows_to_read = 2; -SELECT * FROM pk WHERE x = 3 AND y = 55 AND z = 5786; +SELECT * FROM pk WHERE x = 3 AND y = 55 AND z = 5786 ORDER BY ALL; SET max_rows_to_read = 15; SET merge_tree_min_rows_for_seek = 0; -SELECT * FROM pk WHERE z = 2791; -SELECT * FROM pk WHERE z = 5786; -SELECT * FROM pk WHERE z = 1235; -SELECT * FROM pk WHERE z = 4578; +SELECT * FROM pk WHERE z = 2791 ORDER BY ALL; +SELECT * FROM pk WHERE z = 5786 ORDER BY ALL; +SELECT * FROM pk WHERE z = 1235 ORDER BY ALL; +SELECT * FROM pk WHERE z = 4578 ORDER BY ALL; SET max_rows_to_read = 10; -SELECT * FROM pk WHERE y = 11; -SELECT * FROM pk WHERE y = 22; -SELECT * FROM pk WHERE y = 33; -SELECT * FROM pk WHERE y = 44; -SELECT * FROM pk WHERE y = 55; +SELECT * FROM pk WHERE y = 11 ORDER BY ALL; +SELECT * FROM pk WHERE y = 22 ORDER BY ALL; +SELECT * FROM pk WHERE y = 33 ORDER BY ALL; +SELECT * FROM pk WHERE y = 44 ORDER BY ALL; +SELECT * FROM pk WHERE y = 55 ORDER BY ALL; DROP TABLE pk; diff --git a/tests/queries/0_stateless/00386_enum_in_pk.sql b/tests/queries/0_stateless/00386_enum_in_pk.sql index 4fc79b5ef1b..b59118ed47c 100644 --- a/tests/queries/0_stateless/00386_enum_in_pk.sql +++ b/tests/queries/0_stateless/00386_enum_in_pk.sql @@ -3,43 +3,43 @@ set allow_deprecated_syntax_for_merge_tree=1; CREATE TABLE enum_pk (date Date DEFAULT '0000-00-00', x Enum8('0' = 0, '1' = 1, '2' = 2), d Enum8('0' = 0, '1' = 1, '2' = 2)) ENGINE = MergeTree(date, x, 1); INSERT INTO enum_pk (x, d) VALUES ('0', '0')('1', '1')('0', '0')('1', '1')('1', '1')('0', '0')('0', '0')('2', '2')('0', '0')('1', '1')('1', '1')('1', '1')('1', '1')('0', '0'); -SELECT cityHash64(groupArray(x)) FROM enum_pk WHERE x = '0'; -SELECT cityHash64(groupArray(d)) FROM enum_pk WHERE d = '0'; +SELECT cityHash64(groupArraySorted(100)(x)) FROM enum_pk WHERE x = '0'; +SELECT cityHash64(groupArraySorted(100)(d)) FROM enum_pk WHERE d = '0'; -SELECT cityHash64(groupArray(x)) FROM enum_pk WHERE x != '0'; -SELECT cityHash64(groupArray(d)) FROM enum_pk WHERE d != '0'; +SELECT cityHash64(groupArraySorted(100)(x)) FROM enum_pk WHERE x != '0'; +SELECT cityHash64(groupArraySorted(100)(d)) FROM enum_pk WHERE d != '0'; -SELECT cityHash64(groupArray(x)) FROM enum_pk WHERE x = '1'; -SELECT cityHash64(groupArray(d)) FROM enum_pk WHERE d = '1'; +SELECT cityHash64(groupArraySorted(100)(x)) FROM enum_pk WHERE x = '1'; +SELECT cityHash64(groupArraySorted(100)(d)) FROM enum_pk WHERE d = '1'; -SELECT cityHash64(groupArray(x)) FROM enum_pk WHERE exp2(toInt64(x != '1')) > 1; -SELECT cityHash64(groupArray(d)) FROM enum_pk WHERE exp2(toInt64(d != '1')) > 1; +SELECT cityHash64(groupArraySorted(100)(x)) FROM enum_pk WHERE exp2(toInt64(x != '1')) > 1; +SELECT cityHash64(groupArraySorted(100)(d)) FROM enum_pk WHERE exp2(toInt64(d != '1')) > 1; -SELECT cityHash64(groupArray(x)) FROM enum_pk WHERE x = toString(0); -SELECT cityHash64(groupArray(d)) FROM enum_pk WHERE d = toString(0); +SELECT cityHash64(groupArraySorted(100)(x)) FROM enum_pk WHERE x = toString(0); +SELECT cityHash64(groupArraySorted(100)(d)) FROM enum_pk WHERE d = toString(0); -SELECT cityHash64(groupArray(x)) FROM enum_pk WHERE (x = toString(0)) > 0; -SELECT cityHash64(groupArray(d)) FROM enum_pk WHERE (d = toString(0)) > 0; +SELECT cityHash64(groupArraySorted(100)(x)) FROM enum_pk WHERE (x = toString(0)) > 0; +SELECT cityHash64(groupArraySorted(100)(d)) FROM enum_pk WHERE (d = toString(0)) > 0; -SELECT cityHash64(groupArray(x)) FROM enum_pk WHERE ((x != toString(1)) > 0) > 0; -SELECT cityHash64(groupArray(d)) FROM enum_pk WHERE ((d != toString(1)) > 0) > 0; +SELECT cityHash64(groupArraySorted(100)(x)) FROM enum_pk WHERE ((x != toString(1)) > 0) > 0; +SELECT cityHash64(groupArraySorted(100)(d)) FROM enum_pk WHERE ((d != toString(1)) > 0) > 0; -SELECT cityHash64(groupArray(x)) FROM enum_pk WHERE exp2((x != toString(0)) != 0) > 1; -SELECT cityHash64(groupArray(d)) FROM enum_pk WHERE exp2((d != toString(0)) != 0) > 1; +SELECT cityHash64(groupArraySorted(100)(x)) FROM enum_pk WHERE exp2((x != toString(0)) != 0) > 1; +SELECT cityHash64(groupArraySorted(100)(d)) FROM enum_pk WHERE exp2((d != toString(0)) != 0) > 1; -SELECT cityHash64(groupArray(x)) FROM enum_pk WHERE (-(x != toString(0)) = -1) > 0; -SELECT cityHash64(groupArray(d)) FROM enum_pk WHERE (-(d != toString(0)) = -1) > 0; +SELECT cityHash64(groupArraySorted(100)(x)) FROM enum_pk WHERE (-(x != toString(0)) = -1) > 0; +SELECT cityHash64(groupArraySorted(100)(d)) FROM enum_pk WHERE (-(d != toString(0)) = -1) > 0; -SELECT cityHash64(groupArray(x)) FROM enum_pk WHERE 1 = 1; -SELECT cityHash64(groupArray(d)) FROM enum_pk WHERE 1 = 1; +SELECT cityHash64(groupArraySorted(100)(x)) FROM enum_pk WHERE 1 = 1; +SELECT cityHash64(groupArraySorted(100)(d)) FROM enum_pk WHERE 1 = 1; -SELECT cityHash64(groupArray(x)) FROM enum_pk WHERE (x = '0' OR x = '1'); -SELECT cityHash64(groupArray(d)) FROM enum_pk WHERE (d = '0' OR d = '1'); +SELECT cityHash64(groupArraySorted(100)(x)) FROM enum_pk WHERE (x = '0' OR x = '1'); +SELECT cityHash64(groupArraySorted(100)(d)) FROM enum_pk WHERE (d = '0' OR d = '1'); -SELECT cityHash64(groupArray(x)) FROM enum_pk WHERE x IN ('0', '1'); -SELECT cityHash64(groupArray(d)) FROM enum_pk WHERE d IN ('0', '1'); +SELECT cityHash64(groupArraySorted(100)(x)) FROM enum_pk WHERE x IN ('0', '1'); +SELECT cityHash64(groupArraySorted(100)(d)) FROM enum_pk WHERE d IN ('0', '1'); -SELECT cityHash64(groupArray(x)) FROM enum_pk WHERE (x != '0' AND x != '1'); -SELECT cityHash64(groupArray(d)) FROM enum_pk WHERE (d != '0' AND d != '1'); +SELECT cityHash64(groupArraySorted(100)(x)) FROM enum_pk WHERE (x != '0' AND x != '1'); +SELECT cityHash64(groupArraySorted(100)(d)) FROM enum_pk WHERE (d != '0' AND d != '1'); DROP TABLE enum_pk; diff --git a/tests/queries/0_stateless/00804_test_deflate_qpl_codec_compression.reference b/tests/queries/0_stateless/00804_test_deflate_qpl_codec_compression.reference deleted file mode 100644 index a6e03404f2b..00000000000 --- a/tests/queries/0_stateless/00804_test_deflate_qpl_codec_compression.reference +++ /dev/null @@ -1,6 +0,0 @@ -CREATE TABLE default.compression_codec\n(\n `id` UInt64 CODEC(DEFLATE_QPL),\n `data` String CODEC(DEFLATE_QPL),\n `ddd` Date CODEC(DEFLATE_QPL),\n `ddd32` Date32 CODEC(DEFLATE_QPL),\n `somenum` Float64 CODEC(DEFLATE_QPL),\n `somestr` FixedString(3) CODEC(DEFLATE_QPL),\n `othernum` Int64 CODEC(DEFLATE_QPL),\n `somearray` Array(UInt8) CODEC(DEFLATE_QPL),\n `somemap` Map(String, UInt32) CODEC(DEFLATE_QPL),\n `sometuple` Tuple(\n UInt16,\n UInt64) CODEC(DEFLATE_QPL)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 -1 hello 2018-12-14 2018-12-14 1.1 aaa 5 [1,2,3] {'k1':1,'k2':2} (1,2) -2 world 2018-12-15 2018-12-15 2.2 bbb 6 [4,5,6] {'k3':3,'k4':4} (3,4) -3 ! 2018-12-16 2018-12-16 3.3 ccc 7 [7,8,9] {'k5':5,'k6':6} (5,6) -2 -10001 diff --git a/tests/queries/0_stateless/00804_test_deflate_qpl_codec_compression.sql b/tests/queries/0_stateless/00804_test_deflate_qpl_codec_compression.sql deleted file mode 100644 index d8c28a7d9d7..00000000000 --- a/tests/queries/0_stateless/00804_test_deflate_qpl_codec_compression.sql +++ /dev/null @@ -1,49 +0,0 @@ ---Tags: no-fasttest, no-cpu-aarch64, no-cpu-s390x --- no-fasttest because DEFLATE_QPL isn't available in fasttest --- no-cpu-aarch64 and no-cpu-s390x because DEFLATE_QPL is x86-only - --- A bunch of random DDLs to test the DEFLATE_QPL codec. - -SET enable_deflate_qpl_codec = 1; - --- Suppress test failures because stderr contains warning "Initialization of hardware-assisted DeflateQpl failed, falling --- back to software DeflateQpl coded." -SET send_logs_level = 'fatal'; - -DROP TABLE IF EXISTS compression_codec; - -CREATE TABLE compression_codec( - id UInt64 CODEC(DEFLATE_QPL), - data String CODEC(DEFLATE_QPL), - ddd Date CODEC(DEFLATE_QPL), - ddd32 Date32 CODEC(DEFLATE_QPL), - somenum Float64 CODEC(DEFLATE_QPL), - somestr FixedString(3) CODEC(DEFLATE_QPL), - othernum Int64 CODEC(DEFLATE_QPL), - somearray Array(UInt8) CODEC(DEFLATE_QPL), - somemap Map(String, UInt32) CODEC(DEFLATE_QPL), - sometuple Tuple(UInt16, UInt64) CODEC(DEFLATE_QPL), -) ENGINE = MergeTree() ORDER BY tuple(); - -SHOW CREATE TABLE compression_codec; - -INSERT INTO compression_codec VALUES(1, 'hello', toDate('2018-12-14'), toDate32('2018-12-14'), 1.1, 'aaa', 5, [1,2,3], map('k1',1,'k2',2), tuple(1,2)); -INSERT INTO compression_codec VALUES(2, 'world', toDate('2018-12-15'), toDate32('2018-12-15'), 2.2, 'bbb', 6, [4,5,6], map('k3',3,'k4',4), tuple(3,4)); -INSERT INTO compression_codec VALUES(3, '!', toDate('2018-12-16'), toDate32('2018-12-16'), 3.3, 'ccc', 7, [7,8,9], map('k5',5,'k6',6), tuple(5,6)); - -SELECT * FROM compression_codec ORDER BY id; - -OPTIMIZE TABLE compression_codec FINAL; - -INSERT INTO compression_codec VALUES(2, '', toDate('2018-12-13'), toDate32('2018-12-13'), 4.4, 'ddd', 8, [10,11,12], map('k7',7,'k8',8), tuple(7,8)); - -DETACH TABLE compression_codec; -ATTACH TABLE compression_codec; - -SELECT count(*) FROM compression_codec WHERE id = 2 GROUP BY id; - -INSERT INTO compression_codec SELECT 3, '!', toDate('2018-12-16'), toDate32('2018-12-16'), 3.3, 'ccc', 7, [7,8,9], map('k5',5,'k6',6), tuple(5,6) FROM system.numbers LIMIT 10000; - -SELECT count(*) FROM compression_codec WHERE id = 3 GROUP BY id; - -DROP TABLE IF EXISTS compression_codec; diff --git a/tests/queries/0_stateless/00857_global_joinsavel_table_alias.sql b/tests/queries/0_stateless/00857_global_joinsavel_table_alias.sql index 2044a9b8d22..092b071cb48 100644 --- a/tests/queries/0_stateless/00857_global_joinsavel_table_alias.sql +++ b/tests/queries/0_stateless/00857_global_joinsavel_table_alias.sql @@ -1,4 +1,3 @@ - DROP TABLE IF EXISTS local_table; DROP TABLE IF EXISTS other_table; diff --git a/tests/queries/0_stateless/01039_test_setting_parse.reference b/tests/queries/0_stateless/01039_test_setting_parse.reference index 199b64e7f4d..ec68bacef35 100644 --- a/tests/queries/0_stateless/01039_test_setting_parse.reference +++ b/tests/queries/0_stateless/01039_test_setting_parse.reference @@ -1,9 +1,9 @@ 1000000000 3221225472 -1567000 -1263616 -1567000 -1263616 +15678000 +12641280 +15678000 +12641280 12000000 32505856 1000000000000 diff --git a/tests/queries/0_stateless/01039_test_setting_parse.sql b/tests/queries/0_stateless/01039_test_setting_parse.sql index fd8580d26a5..8f2337fd801 100644 --- a/tests/queries/0_stateless/01039_test_setting_parse.sql +++ b/tests/queries/0_stateless/01039_test_setting_parse.sql @@ -2,13 +2,13 @@ SET max_memory_usage = '1G'; SELECT value FROM system.settings WHERE name = 'max_memory_usage'; SET max_memory_usage = '3Gi'; SELECT value FROM system.settings WHERE name = 'max_memory_usage'; -SET max_memory_usage = '1567k'; +SET max_memory_usage = '15678k'; SELECT value FROM system.settings WHERE name = 'max_memory_usage'; -SET max_memory_usage = '1234ki'; +SET max_memory_usage = '12345ki'; SELECT value FROM system.settings WHERE name = 'max_memory_usage'; -SET max_memory_usage = '1567K'; +SET max_memory_usage = '15678K'; SELECT value FROM system.settings WHERE name = 'max_memory_usage'; -SET max_memory_usage = '1234Ki'; +SET max_memory_usage = '12345Ki'; SELECT value FROM system.settings WHERE name = 'max_memory_usage'; SET max_memory_usage = '12M'; SELECT value FROM system.settings WHERE name = 'max_memory_usage'; diff --git a/tests/queries/0_stateless/01401_FORMAT_SETTINGS.reference b/tests/queries/0_stateless/01401_FORMAT_SETTINGS.reference index 22405bf1866..a8b99666654 100644 --- a/tests/queries/0_stateless/01401_FORMAT_SETTINGS.reference +++ b/tests/queries/0_stateless/01401_FORMAT_SETTINGS.reference @@ -1,7 +1,7 @@ 1 1 1 -1 -1 +2 +1 2 2 diff --git a/tests/queries/0_stateless/01401_FORMAT_SETTINGS.sh b/tests/queries/0_stateless/01401_FORMAT_SETTINGS.sh index b70c28422c9..173cc949500 100755 --- a/tests/queries/0_stateless/01401_FORMAT_SETTINGS.sh +++ b/tests/queries/0_stateless/01401_FORMAT_SETTINGS.sh @@ -13,7 +13,7 @@ ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d 'SELECT DISTINCT blockSize() FROM ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d 'SELECT DISTINCT blockSize() FROM numbers(2) FORMAT CSV SETTINGS max_block_size = 1' # push down append ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d 'SELECT DISTINCT blockSize() FROM numbers(2) SETTINGS max_compress_block_size = 1 FORMAT CSV SETTINGS max_block_size = 1' -# overwrite on push down (since these settings goes latest) +# not overwrite on push down ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d 'SELECT DISTINCT blockSize() FROM numbers(2) SETTINGS max_block_size = 2 FORMAT CSV SETTINGS max_block_size = 1' # on push-down ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d 'SELECT DISTINCT blockSize() FROM numbers(2) SETTINGS max_block_size = 1 FORMAT CSV' diff --git a/tests/queries/0_stateless/02242_system_filesystem_cache_log_table.reference b/tests/queries/0_stateless/02242_system_filesystem_cache_log_table.reference index 447e1a275fc..a5133630186 100644 --- a/tests/queries/0_stateless/02242_system_filesystem_cache_log_table.reference +++ b/tests/queries/0_stateless/02242_system_filesystem_cache_log_table.reference @@ -1,12 +1,18 @@ Using storage policy: s3_cache (0,519) READ_FROM_FS_AND_DOWNLOADED_TO_CACHE +(0,808110) READ_FROM_CACHE (0,808110) READ_FROM_FS_AND_DOWNLOADED_TO_CACHE (0,808110) READ_FROM_CACHE +(0,808110) READ_FROM_CACHE Using storage policy: local_cache (0,519) READ_FROM_FS_AND_DOWNLOADED_TO_CACHE +(0,808110) READ_FROM_CACHE (0,808110) READ_FROM_FS_AND_DOWNLOADED_TO_CACHE (0,808110) READ_FROM_CACHE +(0,808110) READ_FROM_CACHE Using storage policy: azure_cache (0,519) READ_FROM_FS_AND_DOWNLOADED_TO_CACHE +(0,808110) READ_FROM_CACHE (0,808110) READ_FROM_FS_AND_DOWNLOADED_TO_CACHE (0,808110) READ_FROM_CACHE +(0,808110) READ_FROM_CACHE diff --git a/tests/queries/0_stateless/02411_legacy_geobase.reference b/tests/queries/0_stateless/02411_legacy_geobase.reference index 4fc360d876c..ecf8d151c25 100644 --- a/tests/queries/0_stateless/02411_legacy_geobase.reference +++ b/tests/queries/0_stateless/02411_legacy_geobase.reference @@ -284,3 +284,7 @@ Asia is in Asia [10,9,1] ['North America','America','World'] [11,1] ['Eurasia','World'] [12,11,1] ['Asia','Eurasia','World'] + +Mundo +Estados Unidos +Colorado diff --git a/tests/queries/0_stateless/02411_legacy_geobase.sql b/tests/queries/0_stateless/02411_legacy_geobase.sql index 48525bcdc4f..4e044c3f189 100644 --- a/tests/queries/0_stateless/02411_legacy_geobase.sql +++ b/tests/queries/0_stateless/02411_legacy_geobase.sql @@ -12,3 +12,4 @@ SELECT regionToName(number::UInt32, 'en'), regionToTopContinent(number::UInt32) SELECT regionToName(number::UInt32, 'en'), regionToPopulation(number::UInt32) AS id, regionToName(id, 'en') FROM numbers(13); SELECT regionToName(n1.number::UInt32, 'en') || (regionIn(n1.number::UInt32, n2.number::UInt32) ? ' is in ' : ' is not in ') || regionToName(n2.number::UInt32, 'en') FROM numbers(13) AS n1 CROSS JOIN numbers(13) AS n2; SELECT regionHierarchy(number::UInt32) AS arr, arrayMap(id -> regionToName(id, 'en'), arr) FROM numbers(13); +SELECT regionToName(number::UInt32, 'es') FROM numbers(4); diff --git a/tests/queries/0_stateless/02491_part_log_has_table_uuid.reference b/tests/queries/0_stateless/02491_part_log_has_table_uuid.reference index fbc09700fe6..b7d619fb717 100644 --- a/tests/queries/0_stateless/02491_part_log_has_table_uuid.reference +++ b/tests/queries/0_stateless/02491_part_log_has_table_uuid.reference @@ -1,4 +1,5 @@ 1 NewPart NotAMerge all_1_1_0 +1 MergePartsStart RegularMerge all_1_1_1 1 MergeParts RegularMerge all_1_1_1 1 NewPart NotAMerge all_1_1_2 1 RemovePart NotAMerge all_1_1_1 diff --git a/tests/queries/0_stateless/02503_cache_on_write_with_small_segment_size.reference b/tests/queries/0_stateless/02503_cache_on_write_with_small_segment_size.reference index 1823b83ae28..27fb92ab556 100644 --- a/tests/queries/0_stateless/02503_cache_on_write_with_small_segment_size.reference +++ b/tests/queries/0_stateless/02503_cache_on_write_with_small_segment_size.reference @@ -1,3 +1,3 @@ 0 -83 +85 100000 diff --git a/tests/queries/0_stateless/02536_replace_with_nonconst_needle_and_replacement.reference b/tests/queries/0_stateless/02536_replace_with_nonconst_needle_and_replacement.reference index 219149f209c..52fccec1840 100644 --- a/tests/queries/0_stateless/02536_replace_with_nonconst_needle_and_replacement.reference +++ b/tests/queries/0_stateless/02536_replace_with_nonconst_needle_and_replacement.reference @@ -134,7 +134,7 @@ 3 Hello World not_found x Hello World 4 Hello World [eo] x Hxllo World 5 Hello World . x xello World -- should not throw an exception if the needle is empty +Empty needles do not throw an exception - non-const needle, const replacement Hexxo Worxd Hello World diff --git a/tests/queries/0_stateless/02536_replace_with_nonconst_needle_and_replacement.sql b/tests/queries/0_stateless/02536_replace_with_nonconst_needle_and_replacement.sql index 7fd79e3b7ff..26473521b4a 100644 --- a/tests/queries/0_stateless/02536_replace_with_nonconst_needle_and_replacement.sql +++ b/tests/queries/0_stateless/02536_replace_with_nonconst_needle_and_replacement.sql @@ -69,7 +69,7 @@ SELECT id, haystack, needle, replacement, replaceRegexpOne('Hello World', needle DROP TABLE IF EXISTS test_tab; -SELECT '- should not throw an exception if the needle is empty'; +SELECT 'Empty needles do not throw an exception'; CREATE TABLE test_tab (id UInt32, haystack String, needle String, replacement String) diff --git a/tests/queries/0_stateless/02539_vertical_merge_compact_parts.reference b/tests/queries/0_stateless/02539_vertical_merge_compact_parts.reference index 685d3f3140d..9327f61321d 100644 --- a/tests/queries/0_stateless/02539_vertical_merge_compact_parts.reference +++ b/tests/queries/0_stateless/02539_vertical_merge_compact_parts.reference @@ -1,2 +1,4 @@ +1 2 MergePartsStart Undecided Unknown 1 2 MergeParts Horizontal Compact +1 3 MergePartsStart Undecided Unknown 1 3 MergeParts Vertical Wide diff --git a/tests/queries/0_stateless/02539_vertical_merge_compact_parts.sql b/tests/queries/0_stateless/02539_vertical_merge_compact_parts.sql index 898a2abd6c0..ce51481440d 100644 --- a/tests/queries/0_stateless/02539_vertical_merge_compact_parts.sql +++ b/tests/queries/0_stateless/02539_vertical_merge_compact_parts.sql @@ -23,7 +23,8 @@ SELECT min_block, max_block, event_type, merge_algorithm, part_type FROM system. WHERE database = currentDatabase() AND table = 't_compact_vertical_merge' AND - min_block = 1 AND max_block = 2; + min_block = 1 AND max_block = 2 +ORDER BY event_time_microseconds; INSERT INTO t_compact_vertical_merge SELECT number, toString(number), range(number % 10) FROM numbers(40); @@ -37,6 +38,7 @@ SELECT min_block, max_block, event_type, merge_algorithm, part_type FROM system. WHERE database = currentDatabase() AND table = 't_compact_vertical_merge' AND - min_block = 1 AND max_block = 3; + min_block = 1 AND max_block = 3 +ORDER BY event_time_microseconds; DROP TABLE t_compact_vertical_merge; diff --git a/tests/queries/0_stateless/02877_optimize_read_in_order_from_view.sh b/tests/queries/0_stateless/02877_optimize_read_in_order_from_view.sh index dc9cc71757e..cd93d9ddaaf 100755 --- a/tests/queries/0_stateless/02877_optimize_read_in_order_from_view.sh +++ b/tests/queries/0_stateless/02877_optimize_read_in_order_from_view.sh @@ -14,7 +14,7 @@ EOF # The following SELECT is expected to read 20 rows. In fact it may decide to read more than 20 rows, but not too many anyway. # So we'll check that the number of read rows is less than 40. -query="SELECT * FROM (SELECT * FROM view1) ORDER BY number DESC LIMIT 20" +query="SELECT * FROM (SELECT * FROM view1) ORDER BY number DESC LIMIT 20 SETTINGS max_streams_for_merge_tree_reading = 1" query_id=${CLICKHOUSE_DATABASE}_optimize_read_in_order_from_view_$RANDOM$RANDOM diff --git a/tests/queries/0_stateless/02931_max_num_to_warn.reference b/tests/queries/0_stateless/02931_max_num_to_warn.reference index 419149b0bd2..edf1d55f5e4 100644 --- a/tests/queries/0_stateless/02931_max_num_to_warn.reference +++ b/tests/queries/0_stateless/02931_max_num_to_warn.reference @@ -1,5 +1,5 @@ -The number of attached tables is more than 5 -The number of attached views is more than 5 -The number of attached dictionaries is more than 5 -The number of attached databases is more than 2 -The number of active parts is more than 10 +The number of attached tables is more than 5. +The number of attached views is more than 5. +The number of attached dictionaries is more than 5. +The number of attached databases is more than 2. +The number of active parts is more than 10. diff --git a/tests/queries/0_stateless/02931_max_num_to_warn.sql b/tests/queries/0_stateless/02931_max_num_to_warn.sql index 1c96e017646..e5cbbc9155b 100644 --- a/tests/queries/0_stateless/02931_max_num_to_warn.sql +++ b/tests/queries/0_stateless/02931_max_num_to_warn.sql @@ -71,11 +71,11 @@ INSERT INTO test_max_num_to_warn_02931.test_max_num_to_warn_10 VALUES (1, 'Hello INSERT INTO test_max_num_to_warn_02931.test_max_num_to_warn_11 VALUES (1, 'Hello'); SELECT * FROM system.warnings where message in ( - 'The number of attached tables is more than 5', - 'The number of attached views is more than 5', - 'The number of attached dictionaries is more than 5', - 'The number of attached databases is more than 2', - 'The number of active parts is more than 10' + 'The number of attached tables is more than 5.', + 'The number of attached views is more than 5.', + 'The number of attached dictionaries is more than 5.', + 'The number of attached databases is more than 2.', + 'The number of active parts is more than 10.' ); DROP DATABASE IF EXISTS test_max_num_to_warn_02931; diff --git a/tests/queries/0_stateless/02950_part_log_bytes_uncompressed.reference b/tests/queries/0_stateless/02950_part_log_bytes_uncompressed.reference index abdcc960be3..a3f40f61e6a 100644 --- a/tests/queries/0_stateless/02950_part_log_bytes_uncompressed.reference +++ b/tests/queries/0_stateless/02950_part_log_bytes_uncompressed.reference @@ -1,6 +1,8 @@ NewPart part_log_bytes_uncompressed all_1_1_0 1 1 MergeParts part_log_bytes_uncompressed all_1_2_1 1 1 +MergePartsStart part_log_bytes_uncompressed all_1_2_1 0 0 MutatePart part_log_bytes_uncompressed all_1_2_1_3 1 1 +MutatePartStart part_log_bytes_uncompressed all_1_2_1_3 0 0 NewPart part_log_bytes_uncompressed all_2_2_0 1 1 NewPart part_log_bytes_uncompressed all_4_4_0 1 1 RemovePart part_log_bytes_uncompressed all_4_4_0 1 1 diff --git a/tests/queries/0_stateless/03002_part_log_rmt_fetch_merge_error.reference b/tests/queries/0_stateless/03002_part_log_rmt_fetch_merge_error.reference index b19d389d8d0..71c2d1f5d0f 100644 --- a/tests/queries/0_stateless/03002_part_log_rmt_fetch_merge_error.reference +++ b/tests/queries/0_stateless/03002_part_log_rmt_fetch_merge_error.reference @@ -1,10 +1,12 @@ before rmt_master NewPart 0 1 rmt_master MergeParts 0 1 +rmt_master MergePartsStart 0 1 rmt_slave MergeParts 1 0 rmt_slave DownloadPart 0 1 after rmt_master NewPart 0 1 rmt_master MergeParts 0 1 +rmt_master MergePartsStart 0 1 rmt_slave MergeParts 1 0 rmt_slave DownloadPart 0 2 diff --git a/tests/queries/0_stateless/03002_part_log_rmt_fetch_mutate_error.reference b/tests/queries/0_stateless/03002_part_log_rmt_fetch_mutate_error.reference index aac9e7527d1..0145b094875 100644 --- a/tests/queries/0_stateless/03002_part_log_rmt_fetch_mutate_error.reference +++ b/tests/queries/0_stateless/03002_part_log_rmt_fetch_mutate_error.reference @@ -1,10 +1,12 @@ before rmt_master NewPart 0 1 rmt_master MutatePart 0 1 +rmt_master MutatePartStart 0 1 rmt_slave DownloadPart 0 1 rmt_slave MutatePart 1 0 after rmt_master NewPart 0 1 rmt_master MutatePart 0 1 +rmt_master MutatePartStart 0 1 rmt_slave DownloadPart 0 2 rmt_slave MutatePart 1 0 diff --git a/tests/queries/0_stateless/03003_compatibility_setting_bad_value.sql b/tests/queries/0_stateless/03003_compatibility_setting_bad_value.sql index 48e98798c51..3a09eec7452 100644 --- a/tests/queries/0_stateless/03003_compatibility_setting_bad_value.sql +++ b/tests/queries/0_stateless/03003_compatibility_setting_bad_value.sql @@ -1,2 +1 @@ -select 42 settings compatibility=NULL; -- {clientError BAD_ARGUMENTS} - +select 42 settings compatibility=NULL; -- {clientError BAD_GET} diff --git a/tests/queries/0_stateless/03171_indexing_by_hilbert_curve.sql b/tests/queries/0_stateless/03171_indexing_by_hilbert_curve.sql index 2d566e52c94..a61d0280463 100644 --- a/tests/queries/0_stateless/03171_indexing_by_hilbert_curve.sql +++ b/tests/queries/0_stateless/03171_indexing_by_hilbert_curve.sql @@ -3,6 +3,8 @@ DROP TABLE IF EXISTS test_hilbert_encode_hilbert_encode; CREATE TABLE test_hilbert_encode (x UInt32, y UInt32) ENGINE = MergeTree ORDER BY hilbertEncode(x, y) SETTINGS index_granularity = 8192, index_granularity_bytes = '1Mi'; INSERT INTO test_hilbert_encode SELECT number DIV 1024, number % 1024 FROM numbers(1048576); +set max_streams_for_merge_tree_reading = 1; + SET max_rows_to_read = 8192, force_primary_key = 1, analyze_index_with_space_filling_curves = 1; SELECT count() FROM test_hilbert_encode WHERE x >= 10 AND x <= 20 AND y >= 20 AND y <= 30; diff --git a/tests/queries/0_stateless/03172_format_settings_clauses.reference b/tests/queries/0_stateless/03172_format_settings_clauses.reference new file mode 100644 index 00000000000..8a98b137f4b --- /dev/null +++ b/tests/queries/0_stateless/03172_format_settings_clauses.reference @@ -0,0 +1,14 @@ +1 +2 +1 +2 +1 +2 +1 +1 +3 +3 +3 +3 +3 +1 diff --git a/tests/queries/0_stateless/03172_format_settings_clauses.sql b/tests/queries/0_stateless/03172_format_settings_clauses.sql new file mode 100644 index 00000000000..0d1aa4dcfbb --- /dev/null +++ b/tests/queries/0_stateless/03172_format_settings_clauses.sql @@ -0,0 +1,30 @@ +SET max_block_size = 10, max_threads = 1; + +-- Take the following example: +SELECT 1 UNION ALL SELECT 2 FORMAT TSV; + +-- Each subquery can be put in parentheses and have its own settings: +(SELECT getSetting('max_block_size') SETTINGS max_block_size = 1) UNION ALL (SELECT getSetting('max_block_size') SETTINGS max_block_size = 2) FORMAT TSV; + +-- And the whole query can have settings: +(SELECT getSetting('max_block_size') SETTINGS max_block_size = 1) UNION ALL (SELECT getSetting('max_block_size') SETTINGS max_block_size = 2) FORMAT TSV SETTINGS max_block_size = 3; + +-- A single query with output is parsed in the same way as the UNION ALL chain: +SELECT getSetting('max_block_size') SETTINGS max_block_size = 1 FORMAT TSV SETTINGS max_block_size = 3; + +-- So while these forms have a slightly different meaning, they both exist: +SELECT getSetting('max_block_size') SETTINGS max_block_size = 1 FORMAT TSV; +SELECT getSetting('max_block_size') FORMAT TSV SETTINGS max_block_size = 3; + +-- And due to this effect, the users expect that the FORMAT and SETTINGS may go in an arbitrary order. +-- But while this work: +(SELECT getSetting('max_block_size')) UNION ALL (SELECT getSetting('max_block_size')) FORMAT TSV SETTINGS max_block_size = 3; + +-- This does not work automatically, unless we explicitly allow different orders: +(SELECT getSetting('max_block_size')) UNION ALL (SELECT getSetting('max_block_size')) SETTINGS max_block_size = 3 FORMAT TSV; + +-- Inevitably, we allow this: +SELECT getSetting('max_block_size') SETTINGS max_block_size = 1 SETTINGS max_block_size = 3 FORMAT TSV; +/*^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^*/ +-- Because this part is consumed into ASTSelectWithUnionQuery +-- and the rest into ASTQueryWithOutput. diff --git a/tests/queries/0_stateless/03215_parallel_replicas_crash_after_refactoring.reference b/tests/queries/0_stateless/03215_parallel_replicas_crash_after_refactoring.reference index e69de29bb2d..505692b59cd 100644 --- a/tests/queries/0_stateless/03215_parallel_replicas_crash_after_refactoring.reference +++ b/tests/queries/0_stateless/03215_parallel_replicas_crash_after_refactoring.reference @@ -0,0 +1,3 @@ +2999 + +2999 diff --git a/tests/queries/0_stateless/03215_parallel_replicas_crash_after_refactoring.sql b/tests/queries/0_stateless/03215_parallel_replicas_crash_after_refactoring.sql index b12dfc92ddf..f0499f4e211 100644 --- a/tests/queries/0_stateless/03215_parallel_replicas_crash_after_refactoring.sql +++ b/tests/queries/0_stateless/03215_parallel_replicas_crash_after_refactoring.sql @@ -29,6 +29,6 @@ INSERT INTO 03215_parallel_replicas SELECT FROM numbers(2000, 1000); SET parallel_distributed_insert_select = 2, prefer_localhost_replica = false, enable_parallel_replicas = 1, max_parallel_replicas = 65535, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_single_task_marks_count_multiplier = -0., parallel_replicas_for_non_replicated_merge_tree = true; -SELECT max(k) IGNORE NULLS FROM 03215_parallel_replicas WITH TOTALS SETTINGS enable_parallel_replicas = 1, max_parallel_replicas = 65535, prefer_localhost_replica = 0, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_single_task_marks_count_multiplier = -0; -- { serverError 36 } +SELECT max(k) IGNORE NULLS FROM 03215_parallel_replicas WITH TOTALS SETTINGS enable_parallel_replicas = 1, max_parallel_replicas = 65535, prefer_localhost_replica = 0, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_single_task_marks_count_multiplier = -0; DROP TABLE IF EXISTS 03215_parallel_replicas; diff --git a/tests/queries/0_stateless/03254_limit_by_with_offset_parallel_replicas.reference b/tests/queries/0_stateless/03254_limit_by_with_offset_parallel_replicas.reference new file mode 100644 index 00000000000..78e743ab0f9 --- /dev/null +++ b/tests/queries/0_stateless/03254_limit_by_with_offset_parallel_replicas.reference @@ -0,0 +1,400 @@ +1 100 +1 110 +2 200 +2 210 +3 300 +1 100 +1 110 +1 120 +2 200 +2 210 +2 220 +3 300 +1 120 +1 130 +2 220 +1 110 +1 120 +2 210 +2 220 +1 110 +1 120 +2 210 +1 120 +2 210 +2 220 +1 +1 +1 +2 +2 +2 +2 +2 +2 +2 +2 +1 +1 +2 +2 +2 +2 +1 1 +2 1 +3 3 +1 1 +2 1 +3 3 +3 4 +1 100 +1 100 +2 200 +2 200 +3 300 +3 300 +1 100 +1 100 +1 110 +2 200 +2 200 +2 210 +3 300 +3 300 +1 110 +1 110 +2 210 +2 210 +1 100 +1 110 +2 200 +2 210 +3 300 +1 100 +1 110 +2 200 +1 110 +2 200 +2 210 +1 +1 +1 +1 +1 +1 +1 +1 +1 +2 +2 +2 +2 +2 +2 +2 +2 +1 +1 +1 +1 +----- +1 100 +1 110 +2 200 +2 210 +3 300 +1 100 +1 110 +1 120 +2 200 +2 210 +2 220 +3 300 +1 120 +1 130 +2 220 +1 110 +1 120 +2 210 +2 220 +1 110 +1 120 +2 210 +1 120 +2 210 +2 220 +1 +1 +1 +2 +2 +2 +2 +2 +2 +2 +2 +1 +1 +2 +2 +2 +2 +1 1 +2 1 +3 3 +1 1 +2 1 +3 3 +3 4 +1 100 +1 100 +2 200 +2 200 +3 300 +3 300 +1 100 +1 100 +1 110 +2 200 +2 200 +2 210 +3 300 +3 300 +1 110 +1 110 +2 210 +2 210 +1 100 +1 110 +2 200 +2 210 +3 300 +1 100 +1 110 +2 200 +1 110 +2 200 +2 210 +1 +1 +1 +1 +1 +1 +1 +1 +1 +2 +2 +2 +2 +2 +2 +2 +2 +1 +1 +1 +1 +----- +1 100 +1 110 +2 200 +2 210 +3 300 +1 100 +1 110 +1 120 +2 200 +2 210 +2 220 +3 300 +1 120 +1 130 +2 220 +1 110 +1 120 +2 210 +2 220 +1 110 +1 120 +2 210 +1 120 +2 210 +2 220 +1 +1 +1 +2 +2 +2 +2 +2 +2 +2 +2 +1 +1 +2 +2 +2 +2 +1 1 +2 1 +3 3 +1 1 +2 1 +3 3 +3 4 +1 100 +1 100 +2 200 +2 200 +3 300 +3 300 +1 100 +1 100 +1 110 +2 200 +2 200 +2 210 +3 300 +3 300 +1 110 +1 110 +2 210 +2 210 +1 100 +1 110 +2 200 +2 210 +3 300 +1 100 +1 110 +2 200 +1 110 +2 200 +2 210 +1 +1 +1 +1 +1 +1 +1 +1 +1 +2 +2 +2 +2 +2 +2 +2 +2 +1 +1 +1 +1 +----- +1 100 +1 110 +2 200 +2 210 +3 300 +1 100 +1 110 +1 120 +2 200 +2 210 +2 220 +3 300 +1 120 +1 130 +2 220 +1 110 +1 120 +2 210 +2 220 +1 110 +1 120 +2 210 +1 120 +2 210 +2 220 +1 +1 +1 +2 +2 +2 +2 +2 +2 +2 +2 +1 +1 +2 +2 +2 +2 +1 1 +2 1 +3 3 +1 1 +2 1 +3 3 +3 4 +1 100 +1 100 +2 200 +2 200 +3 300 +3 300 +1 100 +1 100 +1 110 +2 200 +2 200 +2 210 +3 300 +3 300 +1 110 +1 110 +2 210 +2 210 +1 100 +1 110 +2 200 +2 210 +3 300 +1 100 +1 110 +2 200 +1 110 +2 200 +2 210 +1 +1 +1 +1 +1 +1 +1 +1 +1 +2 +2 +2 +2 +2 +2 +2 +2 +1 +1 +1 +1 +----- diff --git a/tests/queries/0_stateless/03254_limit_by_with_offset_parallel_replicas.sh b/tests/queries/0_stateless/03254_limit_by_with_offset_parallel_replicas.sh new file mode 100755 index 00000000000..4bad4abc5e7 --- /dev/null +++ b/tests/queries/0_stateless/03254_limit_by_with_offset_parallel_replicas.sh @@ -0,0 +1,79 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + + +${CLICKHOUSE_CLIENT} --query=" +DROP TABLE IF EXISTS limit_by; +DROP TABLE IF EXISTS ties; +DROP TABLE IF EXISTS test_fetch; + +CREATE TABLE limit_by +( + id Int, + val Int +) +ENGINE = MergeTree +ORDER BY tuple(); + +insert into limit_by values(1, 100), (1, 110), (1, 120), (1, 130), (2, 200), (2, 210), (2, 220), (3, 300); + +CREATE TABLE ties +( + a Int +) +ENGINE = MergeTree +ORDER BY tuple(); + +INSERT INTO ties VALUES (1), (1), (2), (2), (2), (2) (3), (3); + +CREATE TABLE test_fetch(a Int32, b Int32) Engine = MergeTree ORDER BY (); + +INSERT INTO test_fetch VALUES(1, 1), (2, 1), (3, 4), (3, 3), (5, 4), (0, 6), (5, 7); +" + +for enable_analyzer in {0..1}; do + for enable_parallel_replicas in {0..1}; do + ${CLICKHOUSE_CLIENT} --query=" + set enable_analyzer=${enable_analyzer}; + set allow_experimental_parallel_reading_from_replicas=${enable_parallel_replicas}, cluster_for_parallel_replicas='parallel_replicas', max_parallel_replicas=100, parallel_replicas_for_non_replicated_merge_tree=1; + + select * from limit_by order by id, val limit 2 by id; + select * from limit_by order by id, val limit 3 by id; + select * from limit_by order by id, val limit 2, 2 by id; + select * from limit_by order by id, val limit 2 offset 1 by id; + select * from limit_by order by id, val limit 1, 2 by id limit 3; + select * from limit_by order by id, val limit 1, 2 by id limit 3 offset 1; + + SELECT a FROM ties order by a limit 1 with ties; + SELECT a FROM ties order by a limit 1, 2 with ties; + SELECT a FROM ties order by a limit 2, 3 with ties; + SELECT a FROM ties order by a limit 4 with ties; + + SELECT * FROM (SELECT * FROM test_fetch ORDER BY a, b OFFSET 1 ROW FETCH FIRST 3 ROWS ONLY) ORDER BY a, b; + SELECT * FROM (SELECT * FROM test_fetch ORDER BY a OFFSET 1 ROW FETCH FIRST 3 ROWS WITH TIES) ORDER BY a, b; + + select * from remote('127.0.0.{1,2}', currentDatabase(), limit_by) order by id, val limit 2 by id; + select * from remote('127.0.0.{1,2}', currentDatabase(), limit_by) order by id, val limit 3 by id; + select * from remote('127.0.0.{1,2}', currentDatabase(), limit_by) order by id, val limit 2, 2 by id; + select * from remote('127.0.0.{1,2}', currentDatabase(), limit_by) order by id, val limit 2 offset 1 by id; + select * from remote('127.0.0.{1,2}', currentDatabase(), limit_by) order by id, val limit 1, 2 by id limit 3; + select * from remote('127.0.0.{1,2}', currentDatabase(), limit_by) order by id, val limit 1, 2 by id limit 3 offset 1; + + SELECT a from remote('127.0.0.{1,2}', currentDatabase(), ties) order by a limit 1 with ties; + SELECT a from remote('127.0.0.{1,2}', currentDatabase(), ties) order by a limit 1, 2 with ties; + SELECT a from remote('127.0.0.{1,2}', currentDatabase(), ties) order by a limit 2, 3 with ties; + SELECT a from remote('127.0.0.{1,2}', currentDatabase(), ties) order by a limit 4 with ties; + + SELECT '-----'; + " + done +done + +${CLICKHOUSE_CLIENT} --query=" +DROP TABLE limit_by; +DROP TABLE ties; +DROP TABLE test_fetch; +" diff --git a/tests/queries/0_stateless/03254_merge_source_parts.reference b/tests/queries/0_stateless/03254_merge_source_parts.reference new file mode 100644 index 00000000000..629479b40a2 --- /dev/null +++ b/tests/queries/0_stateless/03254_merge_source_parts.reference @@ -0,0 +1 @@ +1 2 2 1 diff --git a/tests/queries/0_stateless/03254_merge_source_parts.sql b/tests/queries/0_stateless/03254_merge_source_parts.sql new file mode 100644 index 00000000000..3324cdf2aa0 --- /dev/null +++ b/tests/queries/0_stateless/03254_merge_source_parts.sql @@ -0,0 +1,8 @@ +DROP TABLE IF EXISTS test; +CREATE TABLE test (x UInt8) ORDER BY x; +INSERT INTO test VALUES (1); +INSERT INTO test VALUES (2); +OPTIMIZE TABLE test FINAL; +SYSTEM FLUSH LOGS; +SELECT ProfileEvents['Merge'], ProfileEvents['MergeSourceParts'], ProfileEvents['MergedRows'], ProfileEvents['MergedColumns'] FROM system.part_log WHERE database = currentDatabase() AND table = 'test' AND event_type = 'MergeParts'; +DROP TABLE test; diff --git a/tests/queries/0_stateless/03254_normalize_aggregate_states_with_named_tuple_args.reference b/tests/queries/0_stateless/03254_normalize_aggregate_states_with_named_tuple_args.reference new file mode 100644 index 00000000000..1affee4ff19 --- /dev/null +++ b/tests/queries/0_stateless/03254_normalize_aggregate_states_with_named_tuple_args.reference @@ -0,0 +1,3 @@ +{"finalizeAggregation(x)":"1","finalizeAggregation(y)":"1","finalizeAggregation(z)":"1"} +1 2 +2 1 diff --git a/tests/queries/0_stateless/03254_normalize_aggregate_states_with_named_tuple_args.sql b/tests/queries/0_stateless/03254_normalize_aggregate_states_with_named_tuple_args.sql new file mode 100644 index 00000000000..29eb6549f04 --- /dev/null +++ b/tests/queries/0_stateless/03254_normalize_aggregate_states_with_named_tuple_args.sql @@ -0,0 +1,33 @@ +SET enable_analyzer = 1; +SET enable_named_columns_in_function_tuple = 1; + +SELECT + * APPLY finalizeAggregation +FROM +( + WITH + (1, 2)::Tuple(a int, b int) AS nt + SELECT + uniqState(nt)::AggregateFunction(uniq, Tuple(int, int)) x, + uniqState([nt])::AggregateFunction(uniq, Array(Tuple(int, int))) y, + uniqState(map(nt, nt))::AggregateFunction(uniq, Map(Tuple(int, int), Tuple(int, int))) z +) +FORMAT JSONEachRow; + +DROP TABLE IF EXISTS users; +DROP TABLE IF EXISTS users2; +DROP TABLE IF EXISTS test_mv; + +CREATE TABLE users (id UInt8, city String, name String) ENGINE=Memory; +CREATE TABLE users2 (id UInt8, city_name_uniq AggregateFunction(uniq, Tuple(String,String))) ENGINE=AggregatingMergeTree() ORDER BY (id); +CREATE MATERIALIZED VIEW test_mv TO users2 AS SELECT id, uniqState((city, name)) AS city_name_uniq FROM users GROUP BY id; + +INSERT INTO users VALUES (1, 'London', 'John'); +INSERT INTO users VALUES (1, 'Berlin', 'Ksenia'); +INSERT INTO users VALUES (2, 'Paris', 'Alice'); + +SELECT id, uniqMerge(city_name_uniq) FROM users2 GROUP BY id ORDER BY id; + +DROP TABLE IF EXISTS users; +DROP TABLE IF EXISTS users2; +DROP TABLE IF EXISTS test_mv; diff --git a/tests/queries/0_stateless/03254_part_log_partition_column_is_set.reference b/tests/queries/0_stateless/03254_part_log_partition_column_is_set.reference new file mode 100644 index 00000000000..c355e3b06a6 --- /dev/null +++ b/tests/queries/0_stateless/03254_part_log_partition_column_is_set.reference @@ -0,0 +1,3 @@ +1 1 11 +1 2 12 +0 diff --git a/tests/queries/0_stateless/03254_part_log_partition_column_is_set.sql b/tests/queries/0_stateless/03254_part_log_partition_column_is_set.sql new file mode 100644 index 00000000000..32cab8f8dd8 --- /dev/null +++ b/tests/queries/0_stateless/03254_part_log_partition_column_is_set.sql @@ -0,0 +1,16 @@ +DROP TABLE IF EXISTS test; +CREATE TABLE test (x UInt8, y UInt8, z String DEFAULT toString(x)) PARTITION BY x ORDER BY x; +INSERT INTO test (x, y) VALUES (1, 1); +INSERT INTO test (x, y) VALUES (1, 2); +OPTIMIZE TABLE test FINAL; +INSERT INTO test (x, y) VALUES (2, 1); +ALTER TABLE test DROP PARTITION 2; +SET mutations_sync = 1; +ALTER TABLE test UPDATE z = x || y WHERE 1; +SELECT * FROM test ORDER BY ALL; +TRUNCATE TABLE test; +DROP TABLE test SYNC; +SYSTEM FLUSH LOGS; + +-- SELECT * FROM system.part_log WHERE database = currentDatabase() FORMAT Vertical; +SELECT DISTINCT throwIf(empty(partition)) FROM system.part_log WHERE database = currentDatabase(); diff --git a/tests/queries/0_stateless/03254_trivial_merge_selector.reference b/tests/queries/0_stateless/03254_trivial_merge_selector.reference new file mode 100644 index 00000000000..f0559c36247 --- /dev/null +++ b/tests/queries/0_stateless/03254_trivial_merge_selector.reference @@ -0,0 +1,101 @@ +1 +all_1_1_0 +1 +2 +all_1_1_0 +all_2_2_0 +1 +2 +3 +all_1_1_0 +all_2_2_0 +all_3_3_0 +1 +2 +3 +4 +all_1_1_0 +all_2_2_0 +all_3_3_0 +all_4_4_0 +1 +2 +3 +4 +5 +all_1_1_0 +all_2_2_0 +all_3_3_0 +all_4_4_0 +all_5_5_0 +1 +2 +3 +4 +5 +6 +all_1_1_0 +all_2_2_0 +all_3_3_0 +all_4_4_0 +all_5_5_0 +all_6_6_0 +1 +2 +3 +4 +5 +6 +7 +all_1_1_0 +all_2_2_0 +all_3_3_0 +all_4_4_0 +all_5_5_0 +all_6_6_0 +all_7_7_0 +1 +2 +3 +4 +5 +6 +7 +8 +all_1_1_0 +all_2_2_0 +all_3_3_0 +all_4_4_0 +all_5_5_0 +all_6_6_0 +all_7_7_0 +all_8_8_0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +all_1_1_0 +all_2_2_0 +all_3_3_0 +all_4_4_0 +all_5_5_0 +all_6_6_0 +all_7_7_0 +all_8_8_0 +all_9_9_0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +all_1_10_1 diff --git a/tests/queries/0_stateless/03254_trivial_merge_selector.sh b/tests/queries/0_stateless/03254_trivial_merge_selector.sh new file mode 100755 index 00000000000..80059acb85c --- /dev/null +++ b/tests/queries/0_stateless/03254_trivial_merge_selector.sh @@ -0,0 +1,52 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +# This is a smoke test, it proves that the Trivial merge selector exists and does something. + +${CLICKHOUSE_CLIENT} --query " +DROP TABLE IF EXISTS test; +CREATE TABLE test (x UInt64) ENGINE = MergeTree ORDER BY x SETTINGS merge_selector_algorithm = 'Trivial'; +INSERT INTO test VALUES (1); +SELECT x FROM test ORDER BY x; +SELECT name FROM system.parts WHERE active AND table = 'test' AND database = currentDatabase(); +INSERT INTO test VALUES (2); +SELECT x FROM test ORDER BY x; +SELECT name FROM system.parts WHERE active AND table = 'test' AND database = currentDatabase(); +INSERT INTO test VALUES (3); +SELECT x FROM test ORDER BY x; +SELECT name FROM system.parts WHERE active AND table = 'test' AND database = currentDatabase(); +INSERT INTO test VALUES (4); +SELECT x FROM test ORDER BY x; +SELECT name FROM system.parts WHERE active AND table = 'test' AND database = currentDatabase(); +INSERT INTO test VALUES (5); +SELECT x FROM test ORDER BY x; +SELECT name FROM system.parts WHERE active AND table = 'test' AND database = currentDatabase(); +INSERT INTO test VALUES (6); +SELECT x FROM test ORDER BY x; +SELECT name FROM system.parts WHERE active AND table = 'test' AND database = currentDatabase(); +INSERT INTO test VALUES (7); +SELECT x FROM test ORDER BY x; +SELECT name FROM system.parts WHERE active AND table = 'test' AND database = currentDatabase(); +INSERT INTO test VALUES (8); +SELECT x FROM test ORDER BY x; +SELECT name FROM system.parts WHERE active AND table = 'test' AND database = currentDatabase(); +INSERT INTO test VALUES (9); +SELECT x FROM test ORDER BY x; +SELECT name FROM system.parts WHERE active AND table = 'test' AND database = currentDatabase(); +INSERT INTO test VALUES (10); +SELECT x FROM test ORDER BY x; +OPTIMIZE TABLE test; +" + +while true +do + ${CLICKHOUSE_CLIENT} --query "SELECT name FROM system.parts WHERE active AND table = 'test' AND database = currentDatabase() AND name = 'all_1_10_1'" | grep . && break + sleep 0.1 +done + +${CLICKHOUSE_CLIENT} --query " +DROP TABLE test; +" diff --git a/tests/queries/0_stateless/03254_uniq_exact_two_level_negative_zero.reference b/tests/queries/0_stateless/03254_uniq_exact_two_level_negative_zero.reference new file mode 100644 index 00000000000..771c05369c1 --- /dev/null +++ b/tests/queries/0_stateless/03254_uniq_exact_two_level_negative_zero.reference @@ -0,0 +1 @@ +7992019 diff --git a/tests/queries/0_stateless/03254_uniq_exact_two_level_negative_zero.sql b/tests/queries/0_stateless/03254_uniq_exact_two_level_negative_zero.sql new file mode 100644 index 00000000000..3237818d290 --- /dev/null +++ b/tests/queries/0_stateless/03254_uniq_exact_two_level_negative_zero.sql @@ -0,0 +1 @@ +WITH number % 1000 = 0 ? (rand() % 2 ? 0.0 : -0.0) : number::Float64 AS x SELECT length(uniqExactState(x)::String) FROM numbers(1000000); diff --git a/tests/queries/0_stateless/03255_merge_mutation_start_entry_in_the_part_log.reference b/tests/queries/0_stateless/03255_merge_mutation_start_entry_in_the_part_log.reference new file mode 100644 index 00000000000..660df8758c0 --- /dev/null +++ b/tests/queries/0_stateless/03255_merge_mutation_start_entry_in_the_part_log.reference @@ -0,0 +1,45 @@ +1 1 11 +1 2 12 +Row 1: +────── +event_type: MergePartsStart +merge_reason: RegularMerge +table: test +part_name: 1_1_2_1 +partition_id: 1 +partition: 1 +rows: 0 +merged_from: ['1_1_1_0','1_2_2_0'] + +Row 2: +────── +event_type: MergeParts +merge_reason: RegularMerge +table: test +part_name: 1_1_2_1 +partition_id: 1 +partition: 1 +rows: 2 +merged_from: ['1_1_1_0','1_2_2_0'] + +Row 3: +────── +event_type: MutatePartStart +merge_reason: NotAMerge +table: test +part_name: 1_1_2_1_4 +partition_id: 1 +partition: 1 +rows: 0 +merged_from: ['1_1_2_1'] + +Row 4: +────── +event_type: MutatePart +merge_reason: NotAMerge +table: test +part_name: 1_1_2_1_4 +partition_id: 1 +partition: 1 +rows: 2 +merged_from: ['1_1_2_1'] diff --git a/tests/queries/0_stateless/03255_merge_mutation_start_entry_in_the_part_log.sql b/tests/queries/0_stateless/03255_merge_mutation_start_entry_in_the_part_log.sql new file mode 100644 index 00000000000..7bd44e6a50d --- /dev/null +++ b/tests/queries/0_stateless/03255_merge_mutation_start_entry_in_the_part_log.sql @@ -0,0 +1,17 @@ +DROP TABLE IF EXISTS test; +CREATE TABLE test (x UInt8, y UInt8, z String DEFAULT toString(x)) PARTITION BY x ORDER BY x; +INSERT INTO test (x, y) VALUES (1, 1); +INSERT INTO test (x, y) VALUES (1, 2); +OPTIMIZE TABLE test FINAL; +INSERT INTO test (x, y) VALUES (2, 1); +ALTER TABLE test DROP PARTITION 2; +SET mutations_sync = 1; +ALTER TABLE test UPDATE z = x || y WHERE 1; +SELECT * FROM test ORDER BY ALL; +TRUNCATE TABLE test; +DROP TABLE test SYNC; +SYSTEM FLUSH LOGS; + +SELECT event_type, merge_reason, table, part_name, partition_id, partition, rows, merged_from +FROM system.part_log WHERE database = currentDatabase() AND event_type IN ('MergePartsStart', 'MergeParts', 'MutatePartStart', 'MutatePart') +ORDER BY event_time_microseconds FORMAT Vertical; diff --git a/tests/queries/0_stateless/03256_merges.reference b/tests/queries/0_stateless/03256_merges.reference new file mode 100644 index 00000000000..bdf863349aa --- /dev/null +++ b/tests/queries/0_stateless/03256_merges.reference @@ -0,0 +1 @@ +Merges Visualizer diff --git a/tests/queries/0_stateless/03256_merges.sh b/tests/queries/0_stateless/03256_merges.sh new file mode 100755 index 00000000000..867d1a99ec4 --- /dev/null +++ b/tests/queries/0_stateless/03256_merges.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +${CLICKHOUSE_CURL} -s "${CLICKHOUSE_PORT_HTTP_PROTO}://${CLICKHOUSE_HOST}:${CLICKHOUSE_PORT_HTTP}/merges" 2>/dev/null | grep -oF --max-count 1 'Merges Visualizer' diff --git a/utils/c++expr b/utils/c++expr index 8cf5d3a3b16..9196947f778 100755 --- a/utils/c++expr +++ b/utils/c++expr @@ -55,7 +55,7 @@ KEEP_WORKTREE=0 # if [ "$1" == "--help" ] || [ -z "$1" ]; then usage; fi -while getopts "vc:CIi:l:bkB:t:o:O:g:" OPT; do +while getopts "vc:CIi:l:b:kB:t:o:O:g:" OPT; do case "$OPT" in v) set -x; ;; c) CXX="$OPTARG"; ;; diff --git a/utils/check-style/check-settings-style b/utils/check-style/check-settings-style index 460277b0277..70a954c98bd 100755 --- a/utils/check-style/check-settings-style +++ b/utils/check-style/check-settings-style @@ -3,81 +3,81 @@ # Fast check of all the setting struct usages # The linker does not complain about incorrect extern usage, so we need to make sure the style checker handles -LC_ALL="en_US.UTF-8" +# We want traditional order so it takes underscore into account. With UTF-8 this is considered sorted: +# disk_connections_warn_limit UInt64 +# disk Float +# disk_move_retries_during_init UInt64 +# disk_move_retries_wait_ms UInt64 +# disk String +export LC_COLLATE="C" ROOT_PATH=$(git rev-parse --show-toplevel) -# Duplicated or incorrect setting declarations SETTINGS_FILE=$(mktemp) +trap 'rm ${SETTINGS_FILE}' EXIT + +# Please note that ALL FILES MUST BE NAMED {}Settings and that must also be EXACTLY the class name ALL_DECLARATION_FILES=" - $ROOT_PATH/src/Core/Settings.cpp - $ROOT_PATH/src/Core/ServerSettings.cpp - $ROOT_PATH/src/Storages/MergeTree/MergeTreeSettings.cpp - $ROOT_PATH/src/Coordination/CoordinationSettings.cpp - $ROOT_PATH/src/Databases/DatabaseReplicatedSettings.cpp - $ROOT_PATH/src/Storages/TimeSeries/TimeSeriesSettings.cpp - $ROOT_PATH/src/Storages/RocksDB/RocksDBSettings.cpp - $ROOT_PATH/src/Storages/RabbitMQ/RabbitMQSettings.cpp - $ROOT_PATH/src/Storages/PostgreSQL/MaterializedPostgreSQLSettings.cpp - $ROOT_PATH/src/Storages/ObjectStorageQueue/ObjectStorageQueueSettings.cpp - $ROOT_PATH/src/Storages/MaterializedView/RefreshSettings.cpp - $ROOT_PATH/src/Core/FormatFactorySettingsDeclaration.h" + $ROOT_PATH/src/Core/FormatFactorySettings.h + $ROOT_PATH/src/Core/Settings.cpp + $ROOT_PATH/src/Core/ServerSettings.cpp + $ROOT_PATH/src/Storages/MergeTree/MergeTreeSettings.cpp + $ROOT_PATH/src/Coordination/CoordinationSettings.cpp + $ROOT_PATH/src/Databases/DatabaseReplicatedSettings.cpp + $ROOT_PATH/src/Storages/TimeSeries/TimeSeriesSettings.cpp + $ROOT_PATH/src/Storages/RocksDB/RocksDBSettings.cpp + $ROOT_PATH/src/Storages/RabbitMQ/RabbitMQSettings.cpp + $ROOT_PATH/src/Storages/PostgreSQL/MaterializedPostgreSQLSettings.cpp + $ROOT_PATH/src/Storages/ObjectStorageQueue/ObjectStorageQueueSettings.cpp + $ROOT_PATH/src/Storages/MaterializedView/RefreshSettings.cpp + $ROOT_PATH/src/Storages/NATS/NATSSettings.cpp + $ROOT_PATH/src/Storages/Kafka/KafkaSettings.cpp + $ROOT_PATH/src/Storages/Hive/HiveSettings.cpp + $ROOT_PATH/src/Storages/FileLog/FileLogSettings.cpp + $ROOT_PATH/src/Storages/Distributed/DistributedSettings.cpp + $ROOT_PATH/src/Storages/SetSettings.cpp + $ROOT_PATH/src/Storages/MemorySettings.cpp + $ROOT_PATH/src/Storages/ExecutableSettings.cpp + $ROOT_PATH/src/Storages/MySQL/MySQLSettings.cpp + $ROOT_PATH/src/Databases/MySQL/MaterializedMySQLSettings.cpp +" + +# We create an initial file with the shape {setting_name} {ClassName}{Type} SettingsDeclaration +# We will use SettingsDeclaration to differentiate between setting declaration and usage +function add_setting_declaration_file() +{ + if ! [ -f "$1" ]; then + echo "File '$1' does not exist." + fi + filename=$(basename -- "$1") + filename="${filename%.*}" + grep "DECLARE(" "$1" | awk -vfilename="${filename}" '{print substr($2, 0, length($2) - 1) " " filename substr($1, 9, length($1) - 9) " SettingsDeclaration" }' | sort | uniq >> "${SETTINGS_FILE}" +} for settings_file in ${ALL_DECLARATION_FILES}; do - if ! [ -f "${settings_file}" ]; then - echo "File '${settings_file}' does not exist." - fi + add_setting_declaration_file "$settings_file" done -cat $ROOT_PATH/src/Core/Settings.cpp $ROOT_PATH/src/Core/FormatFactorySettingsDeclaration.h | grep " M(" | awk '{print substr($2, 0, length($2) - 1) " Settings" substr($1, 3, length($1) - 3) " SettingsDeclaration" }' | sort | uniq > ${SETTINGS_FILE} -cat $ROOT_PATH/src/Core/ServerSettings.cpp | grep " M(" | awk '{print substr($2, 0, length($2) - 1) " ServerSettings" substr($1, 3, length($1) - 3) " SettingsDeclaration" }' | sort | uniq >> ${SETTINGS_FILE} -cat $ROOT_PATH/src/Storages/MergeTree/MergeTreeSettings.cpp | grep " M(" | awk '{print substr($2, 0, length($2) - 1) " MergeTreeSettings" substr($1, 3, length($1) - 3) " SettingsDeclaration" }' | sort | uniq >> ${SETTINGS_FILE} -cat $ROOT_PATH/src/Coordination/CoordinationSettings.cpp | grep " M(" | awk '{print substr($2, 0, length($2) - 1) " CoordinationSettings" substr($1, 3, length($1) - 3) " SettingsDeclaration" }' | sort | uniq >> ${SETTINGS_FILE} -cat $ROOT_PATH/src/Databases/DatabaseReplicatedSettings.cpp | grep " M(" | awk '{print substr($2, 0, length($2) - 1) " DatabaseReplicatedSettings" substr($1, 3, length($1) - 3) " SettingsDeclaration" }' | sort | uniq >> ${SETTINGS_FILE} -cat $ROOT_PATH/src/Storages/TimeSeries/TimeSeriesSettings.cpp | grep " M(" | awk '{print substr($2, 0, length($2) - 1) " TimeSeriesSettings" substr($1, 3, length($1) - 3) " SettingsDeclaration" }' | sort | uniq >> ${SETTINGS_FILE} -cat $ROOT_PATH/src/Storages/RocksDB/RocksDBSettings.cpp | grep " M(" | awk '{print substr($2, 0, length($2) - 1) " RocksDBSettings" substr($1, 3, length($1) - 3) " SettingsDeclaration" }' | sort | uniq >> ${SETTINGS_FILE} -cat $ROOT_PATH/src/Storages/RabbitMQ/RabbitMQSettings.cpp | grep " M(" | awk '{print substr($2, 0, length($2) - 1) " RabbitMQSettings" substr($1, 3, length($1) - 3) " SettingsDeclaration" }' | sort | uniq >> ${SETTINGS_FILE} -cat $ROOT_PATH/src/Storages/PostgreSQL/MaterializedPostgreSQLSettings.cpp | grep " M(" | awk '{print substr($2, 0, length($2) - 1) " MaterializedPostgreSQLSettings" substr($1, 3, length($1) - 3) " SettingsDeclaration" }' | sort | uniq >> ${SETTINGS_FILE} -cat $ROOT_PATH/src/Storages/ObjectStorageQueue/ObjectStorageQueueSettings.cpp | grep " M(" | awk '{print substr($2, 0, length($2) - 1) " ObjectStorageQueueSettings" substr($1, 3, length($1) - 3) " SettingsDeclaration" }' | sort | uniq >> ${SETTINGS_FILE} -cat $ROOT_PATH/src/Storages/MaterializedView/RefreshSettings.cpp | grep " M(" | awk '{print substr($2, 0, length($2) - 1) " RefreshSettings" substr($1, 3, length($1) - 3) " SettingsDeclaration" }' | sort | uniq >> ${SETTINGS_FILE} - - # Check that if there are duplicated settings (declared in different objects) they all have the same type (it's simpler to validate style with that assert) -for setting in $(awk '{print $1 " " $2}' ${SETTINGS_FILE} | \ - sed -e 's/CoordinationSettings//g' \ - -e 's/DatabaseReplicatedSettings//g' \ - -e 's/TimeSeriesSettings//g' \ - -e 's/RabbitMQSettings//g' \ - -e 's/RocksDBSettings//g' \ - -e 's/MaterializedPostgreSQLSettings//g' \ - -e 's/ObjectStorageQueueSettings//g' \ - -e 's/RefreshSettings//g' \ - -e 's/MergeTreeSettings//g' \ - -e 's/ServerSettings//g' \ - -e 's/Settings//g' | \ - sort | uniq | awk '{ print $1 }' | uniq -d); -do - echo "# Found multiple definitions of setting ${setting} with different types: " - grep --line-number " ${setting}," ${ALL_DECLARATION_FILES} | awk '{print " > " $0 }' -done +# Disabled because fixing this requires changing types of existing settings, and it's not as simple as just changing it as compatibility with +# previous releases is more important +#for setting in $( +# awk '{ gsub(/^.*Settings/, "", $2); print $1 " " $2}' "${SETTINGS_FILE}" | \ +# sort | uniq | awk '{ print $1 }' | uniq -d +# ); +#do +# echo "# Found multiple definitions of setting ${setting} with different types: " +# grep --line-number " ${setting}," ${ALL_DECLARATION_FILES} | awk '{print " > " $0 }' +#done # We append all uses of extern found in implementation files to validate them in a single pass and avoid reading the same files over and over -find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' | \ - xargs grep -e "^\s*extern const Settings" \ - -e "^\s**extern const ServerSettings" \ - -e "^\s**extern const MergeTreeSettings" \ - -e "^\s**extern const RabbitMQSettings" \ - -e "^\s**extern const RocksDBSettings" \ - -e "^\s**extern const MaterializedPostgreSQLSettings" \ - -e "^\s**extern const ObjectStorageQueueSettings" \ - -e "^\s**extern const RefreshSettings" \ - -e "^\s**extern const TimeSeriesSettings" \ - -e "^\s**extern const DatabaseReplicatedSettings" \ - -e "^\s**extern const CoordinationSettings" -T | \ - awk '{print substr($5, 0, length($5) -1) " " $4 " " substr($1, 0, length($1) - 1)}' >> ${SETTINGS_FILE} +# Note that rg outputs 'path:$line', so with replace ':' with a space and then reorder to have "$setting $type $path" +find "$ROOT_PATH"/{src,base,programs,utils} \( -name '*.cpp' -o -name '*.h' \) -print0 | \ + xargs -0 rg "^\s*extern const .*Settings" | tr ':' ' ' | \ + awk '{print substr($5, 0, length($5) -1) " " $4 " " $1}' >> "${SETTINGS_FILE}" -# Duplicate extern declarations for settings -awk '{if (seen[$0]++) print $3 " -> " $1 ;}' ${SETTINGS_FILE} | while read line; +# Detect duplicate extern declarations for settings (harmless but better style) +awk '{if (seen[$0]++) print $3 " -> " $1 ;}' "${SETTINGS_FILE}" | while read -r line; do echo "# Found duplicated setting declaration in: $line" done @@ -85,31 +85,22 @@ done # Find missing declarations (obsolete settings being used) # Note that SettingsDeclaration are first in the file # Disabled for now pending fixing the code -#awk '{print $1 " " $3}' ${SETTINGS_FILE} | awk '{if (!seen[$1]++) print $0}' | grep -v SettingsDeclaration | while read setting; +#awk '{print $1 " " $3}' "${SETTINGS_FILE}" | awk '{if (!seen[$1]++) print $0}' | grep -v SettingsDeclaration | while read -r setting; #do -# echo "Could not find setting (maybe obsolete but used?) $setting" +# echo "Could not find setting (maybe obsolete?) $setting" #done # Look for settings declared with multiple types -for setting in $(awk '{print $1 " " $2}' ${SETTINGS_FILE} | \ - sed -e 's/MergeTreeSettings//g' \ - -e 's/ServerSettings//g' \ - -e 's/CoordinationSettings//g' \ - -e 's/TimeSeriesSettings//g' \ - -e 's/RabbitMQSettings//g' \ - -e 's/RefreshSettings//g' \ - -e 's/RocksDBSettings//g' \ - -e 's/MaterializedPostgreSQLSettings//g' \ - -e 's/ObjectStorageQueueSettings//g' \ - -e 's/DatabaseReplicatedSettings//g' \ - -e 's/Settings//g' | \ - sort | uniq | awk '{ print $1 }' | sort | uniq -d); +# This works based on the fact that the if the setting declaration and usage have different types then the pair +# won't be unique +for setting in $( + awk '{ gsub(/^.*Settings/, "", $2); print $1 " " $2}' "${SETTINGS_FILE}" | \ + sort | uniq | awk '{ print $1 }' | uniq -d + ); do - expected=$(grep "^$setting " ${SETTINGS_FILE} | grep SettingsDeclaration | awk '{ print $2 }') - grep "^$setting " ${SETTINGS_FILE} | grep -v " $expected" | awk '{ print $3 " found setting " $1 " with type " $2 }' | while read line; + expected=$(grep "^$setting " "${SETTINGS_FILE}" | grep SettingsDeclaration | awk '{ print $2 }') + grep "^$setting " "${SETTINGS_FILE}" | grep -v " $expected" | awk '{ print $3 " found setting \"" $1 "\" with type " $2 }' | while read -r line; do echo "# In $line but it should be ${expected/$'\n'/ }" done done - -rm ${SETTINGS_FILE} diff --git a/utils/check-style/check-style b/utils/check-style/check-style index 9314efa6f90..e15d4ef92cc 100755 --- a/utils/check-style/check-style +++ b/utils/check-style/check-style @@ -15,7 +15,7 @@ LC_ALL="en_US.UTF-8" ROOT_PATH=$(git rev-parse --show-toplevel) EXCLUDE='build/|integration/|widechar_width/|glibc-compatibility/|poco/|memcpy/|consistent-hashing|benchmark|tests/.*.cpp|utils/keeper-bench/example.yaml' -EXCLUDE_DOCS='Settings\.cpp|FormatFactorySettingsDeclaration\.h' +EXCLUDE_DOCS='Settings\.cpp|FormatFactorySettings\.h' # From [1]: # But since array_to_string_internal() in array.c still loops over array