From 7e6cd00cee84394a6d2ead085fcb7b30acfca2da Mon Sep 17 00:00:00 2001 From: Yatsishin Ilya <2159081+qoega@users.noreply.github.com> Date: Mon, 15 Feb 2021 09:31:35 +0300 Subject: [PATCH 001/125] Try to switch to llvm-12/clang-12 --- docker/builder/build.sh | 2 +- docker/packager/binary/Dockerfile | 7 +- docker/packager/deb/Dockerfile | 7 +- docker/packager/packager | 1 + docker/test/codebrowser/Dockerfile | 2 +- docker/test/fuzzer/run-fuzzer.sh | 2 +- docs/en/development/build.md | 10 +-- tests/ci/ci_config.json | 110 ++++++++++++++--------------- 8 files changed, 76 insertions(+), 65 deletions(-) diff --git a/docker/builder/build.sh b/docker/builder/build.sh index d4cf662e91b..7c7a8893751 100755 --- a/docker/builder/build.sh +++ b/docker/builder/build.sh @@ -4,7 +4,7 @@ set -e #ccache -s # uncomment to display CCache statistics mkdir -p /server/build_docker cd /server/build_docker -cmake -G Ninja /server "-DCMAKE_C_COMPILER=$(command -v clang-11)" "-DCMAKE_CXX_COMPILER=$(command -v clang++-11)" +cmake -G Ninja /server "-DCMAKE_C_COMPILER=$(command -v clang-12)" "-DCMAKE_CXX_COMPILER=$(command -v clang++-12)" # Set the number of build jobs to the half of number of virtual CPU cores (rounded up). # By default, ninja use all virtual CPU cores, that leads to very high memory consumption without much improvement in build time. diff --git a/docker/packager/binary/Dockerfile b/docker/packager/binary/Dockerfile index 91036d88d8c..e8071c79a50 100644 --- a/docker/packager/binary/Dockerfile +++ b/docker/packager/binary/Dockerfile @@ -1,7 +1,7 @@ # docker build -t yandex/clickhouse-binary-builder . FROM ubuntu:20.04 -ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=11 +ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=12 RUN apt-get update \ && apt-get install \ @@ -57,6 +57,11 @@ RUN cat /etc/resolv.conf \ lld-11 \ llvm-11 \ llvm-11-dev \ + clang-12 \ + clang-tidy-12 \ + lld-12 \ + llvm-12 \ + llvm-12-dev \ libicu-dev \ libreadline-dev \ ninja-build \ diff --git a/docker/packager/deb/Dockerfile b/docker/packager/deb/Dockerfile index 8fd89d60f85..42a55ab72bd 100644 --- a/docker/packager/deb/Dockerfile +++ b/docker/packager/deb/Dockerfile @@ -1,7 +1,7 @@ # docker build -t yandex/clickhouse-deb-builder . FROM ubuntu:20.04 -ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=11 +ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=12 RUN apt-get update \ && apt-get install ca-certificates lsb-release wget gnupg apt-transport-https \ @@ -36,6 +36,11 @@ RUN apt-get update \ && apt-get install \ gcc-9 \ g++-9 \ + clang-12 \ + clang-tidy-12 \ + lld-12 \ + llvm-12 \ + llvm-12-dev \ clang-11 \ clang-tidy-11 \ lld-11 \ diff --git a/docker/packager/packager b/docker/packager/packager index 65c03cc10e3..a681086f955 100755 --- a/docker/packager/packager +++ b/docker/packager/packager @@ -184,6 +184,7 @@ if __name__ == "__main__": parser.add_argument("--build-type", choices=("debug", ""), default="") parser.add_argument("--compiler", choices=("clang-10", "clang-10-darwin", "clang-10-aarch64", "clang-10-freebsd", "clang-11", "clang-11-darwin", "clang-11-aarch64", "clang-11-freebsd", + "clang-12", "clang-12-darwin", "clang-12-aarch64", "clang-12-freebsd", "gcc-9", "gcc-10"), default="gcc-9") parser.add_argument("--sanitizer", choices=("address", "thread", "memory", "undefined", ""), default="") parser.add_argument("--unbundled", action="store_true") diff --git a/docker/test/codebrowser/Dockerfile b/docker/test/codebrowser/Dockerfile index e03f94a85e0..8f6c760c4a0 100644 --- a/docker/test/codebrowser/Dockerfile +++ b/docker/test/codebrowser/Dockerfile @@ -22,7 +22,7 @@ ENV SHA=nosha ENV DATA="data" CMD mkdir -p $BUILD_DIRECTORY && cd $BUILD_DIRECTORY && \ - cmake $SOURCE_DIRECTORY -DCMAKE_CXX_COMPILER=/usr/bin/clang\+\+-11 -DCMAKE_C_COMPILER=/usr/bin/clang-11 -DCMAKE_EXPORT_COMPILE_COMMANDS=ON && \ + cmake $SOURCE_DIRECTORY -DCMAKE_CXX_COMPILER=/usr/bin/clang\+\+-12 -DCMAKE_C_COMPILER=/usr/bin/clang-12 -DCMAKE_EXPORT_COMPILE_COMMANDS=ON && \ mkdir -p $HTML_RESULT_DIRECTORY && \ $CODEGEN -b $BUILD_DIRECTORY -a -o $HTML_RESULT_DIRECTORY -p ClickHouse:$SOURCE_DIRECTORY:$SHA -d $DATA && \ cp -r $STATIC_DATA $HTML_RESULT_DIRECTORY/ &&\ diff --git a/docker/test/fuzzer/run-fuzzer.sh b/docker/test/fuzzer/run-fuzzer.sh index e21f9efae66..b62c573f586 100755 --- a/docker/test/fuzzer/run-fuzzer.sh +++ b/docker/test/fuzzer/run-fuzzer.sh @@ -12,7 +12,7 @@ stage=${stage:-} script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" echo "$script_dir" repo_dir=ch -BINARY_TO_DOWNLOAD=${BINARY_TO_DOWNLOAD:="clang-11_debug_none_bundled_unsplitted_disable_False_binary"} +BINARY_TO_DOWNLOAD=${BINARY_TO_DOWNLOAD:="clang-12_debug_none_bundled_unsplitted_disable_False_binary"} function clone { diff --git a/docs/en/development/build.md b/docs/en/development/build.md index 3181f26800d..cc184e0302a 100644 --- a/docs/en/development/build.md +++ b/docs/en/development/build.md @@ -23,7 +23,7 @@ $ sudo apt-get install git cmake python ninja-build Or cmake3 instead of cmake on older systems. -### Install clang-11 (recommended) {#install-clang-11} +### Install clang-12 (recommended) {#install-clang-12} On Ubuntu/Debian you can use the automatic installation script (check [official webpage](https://apt.llvm.org/)) @@ -33,16 +33,16 @@ sudo bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)" For other Linux distribution - check the availability of the [prebuild packages](https://releases.llvm.org/download.html) or build clang [from sources](https://clang.llvm.org/get_started.html). -#### Use clang-11 for Builds {#use-gcc-10-for-builds} +#### Use clang-12 for Builds {#use-clang-12-for-builds} ``` bash -$ export CC=clang-11 -$ export CXX=clang++-11 +$ export CC=clang-12 +$ export CXX=clang++-12 ``` ### Install GCC 10 {#install-gcc-10} -We recommend building ClickHouse with clang-11, GCC-10 also supported, but it is not used for production builds. +We recommend building ClickHouse with clang-12, GCC-10 also supported, but it is not used for production builds. If you want to use GCC-10 there are several ways to install it. diff --git a/tests/ci/ci_config.json b/tests/ci/ci_config.json index 0e467319285..703cdc10fed 100644 --- a/tests/ci/ci_config.json +++ b/tests/ci/ci_config.json @@ -1,7 +1,7 @@ { "build_config": [ { - "compiler": "clang-11", + "compiler": "clang-12", "build-type": "", "sanitizer": "", "package-type": "deb", @@ -12,7 +12,7 @@ "with_coverage": false }, { - "compiler": "clang-11", + "compiler": "clang-12", "build-type": "", "sanitizer": "", "package-type": "performance", @@ -32,7 +32,7 @@ "with_coverage": false }, { - "compiler": "clang-11", + "compiler": "clang-12", "build-type": "", "sanitizer": "address", "package-type": "deb", @@ -42,7 +42,7 @@ "with_coverage": false }, { - "compiler": "clang-11", + "compiler": "clang-12", "build-type": "", "sanitizer": "undefined", "package-type": "deb", @@ -52,7 +52,7 @@ "with_coverage": false }, { - "compiler": "clang-11", + "compiler": "clang-12", "build-type": "", "sanitizer": "thread", "package-type": "deb", @@ -62,7 +62,7 @@ "with_coverage": false }, { - "compiler": "clang-11", + "compiler": "clang-12", "build-type": "", "sanitizer": "memory", "package-type": "deb", @@ -82,7 +82,7 @@ "with_coverage": false }, { - "compiler": "clang-11", + "compiler": "clang-12", "build-type": "debug", "sanitizer": "", "package-type": "deb", @@ -102,7 +102,7 @@ "with_coverage": false }, { - "compiler": "clang-11", + "compiler": "clang-12", "build-type": "", "sanitizer": "", "package-type": "binary", @@ -114,7 +114,7 @@ ], "special_build_config": [ { - "compiler": "clang-11", + "compiler": "clang-12", "build-type": "debug", "sanitizer": "", "package-type": "deb", @@ -124,7 +124,7 @@ "with_coverage": true }, { - "compiler": "clang-11", + "compiler": "clang-12", "build-type": "", "sanitizer": "", "package-type": "binary", @@ -134,7 +134,7 @@ "with_coverage": false }, { - "compiler": "clang-11-darwin", + "compiler": "clang-12-darwin", "build-type": "", "sanitizer": "", "package-type": "binary", @@ -144,7 +144,7 @@ "with_coverage": false }, { - "compiler": "clang-11-aarch64", + "compiler": "clang-12-aarch64", "build-type": "", "sanitizer": "", "package-type": "binary", @@ -154,7 +154,7 @@ "with_coverage": false }, { - "compiler": "clang-11-freebsd", + "compiler": "clang-12-freebsd", "build-type": "", "sanitizer": "", "package-type": "binary", @@ -167,7 +167,7 @@ "tests_config": { "Functional stateful tests (address)": { "required_build_properties": { - "compiler": "clang-11", + "compiler": "clang-12", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "address", @@ -179,7 +179,7 @@ }, "Functional stateful tests (thread)": { "required_build_properties": { - "compiler": "clang-11", + "compiler": "clang-12", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "thread", @@ -191,7 +191,7 @@ }, "Functional stateful tests (memory)": { "required_build_properties": { - "compiler": "clang-11", + "compiler": "clang-12", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "memory", @@ -203,7 +203,7 @@ }, "Functional stateful tests (ubsan)": { "required_build_properties": { - "compiler": "clang-11", + "compiler": "clang-12", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "undefined", @@ -215,7 +215,7 @@ }, "Functional stateful tests (debug)": { "required_build_properties": { - "compiler": "clang-11", + "compiler": "clang-12", "package_type": "deb", "build_type": "debug", "sanitizer": "none", @@ -227,7 +227,7 @@ }, "Functional stateless tests (ANTLR debug)": { "required_build_properties": { - "compiler": "clang-11", + "compiler": "clang-12", "package_type": "deb", "build_type": "debug", "sanitizer": "none", @@ -239,7 +239,7 @@ }, "Functional stateful tests (release)": { "required_build_properties": { - "compiler": "clang-11", + "compiler": "clang-12", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "none", @@ -251,7 +251,7 @@ }, "Functional stateful tests (release, DatabaseOrdinary)": { "required_build_properties": { - "compiler": "clang-11", + "compiler": "clang-12", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "none", @@ -275,7 +275,7 @@ }, "Functional stateless tests (address)": { "required_build_properties": { - "compiler": "clang-11", + "compiler": "clang-12", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "address", @@ -287,7 +287,7 @@ }, "Functional stateless tests (thread)": { "required_build_properties": { - "compiler": "clang-11", + "compiler": "clang-12", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "thread", @@ -299,7 +299,7 @@ }, "Functional stateless tests (memory)": { "required_build_properties": { - "compiler": "clang-11", + "compiler": "clang-12", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "memory", @@ -311,7 +311,7 @@ }, "Functional stateless tests (ubsan)": { "required_build_properties": { - "compiler": "clang-11", + "compiler": "clang-12", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "undefined", @@ -323,7 +323,7 @@ }, "Functional stateless tests (debug)": { "required_build_properties": { - "compiler": "clang-11", + "compiler": "clang-12", "package_type": "deb", "build_type": "debug", "sanitizer": "none", @@ -335,7 +335,7 @@ }, "Functional stateless tests (release)": { "required_build_properties": { - "compiler": "clang-11", + "compiler": "clang-12", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "none", @@ -347,7 +347,7 @@ }, "Functional stateless tests (pytest)": { "required_build_properties": { - "compiler": "clang-11", + "compiler": "clang-12", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "none", @@ -371,7 +371,7 @@ }, "Functional stateless tests (release, wide parts enabled)": { "required_build_properties": { - "compiler": "clang-11", + "compiler": "clang-12", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "none", @@ -383,7 +383,7 @@ }, "Functional stateless tests (release, DatabaseOrdinary)": { "required_build_properties": { - "compiler": "clang-11", + "compiler": "clang-12", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "none", @@ -407,7 +407,7 @@ }, "Stress test (address)": { "required_build_properties": { - "compiler": "clang-11", + "compiler": "clang-12", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "address", @@ -419,7 +419,7 @@ }, "Stress test (thread)": { "required_build_properties": { - "compiler": "clang-11", + "compiler": "clang-12", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "thread", @@ -431,7 +431,7 @@ }, "Stress test (undefined)": { "required_build_properties": { - "compiler": "clang-11", + "compiler": "clang-12", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "undefined", @@ -443,7 +443,7 @@ }, "Stress test (memory)": { "required_build_properties": { - "compiler": "clang-11", + "compiler": "clang-12", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "memory", @@ -455,7 +455,7 @@ }, "Stress test (debug)": { "required_build_properties": { - "compiler": "clang-11", + "compiler": "clang-12", "package_type": "deb", "build_type": "debug", "sanitizer": "none", @@ -467,7 +467,7 @@ }, "Integration tests (asan)": { "required_build_properties": { - "compiler": "clang-11", + "compiler": "clang-12", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "address", @@ -479,7 +479,7 @@ }, "Integration tests (thread)": { "required_build_properties": { - "compiler": "clang-11", + "compiler": "clang-12", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "thread", @@ -491,7 +491,7 @@ }, "Integration tests (release)": { "required_build_properties": { - "compiler": "clang-11", + "compiler": "clang-12", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "none", @@ -503,7 +503,7 @@ }, "Integration tests (memory)": { "required_build_properties": { - "compiler": "clang-11", + "compiler": "clang-12", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "memory", @@ -515,7 +515,7 @@ }, "Integration tests flaky check (asan)": { "required_build_properties": { - "compiler": "clang-11", + "compiler": "clang-12", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "address", @@ -527,7 +527,7 @@ }, "Compatibility check": { "required_build_properties": { - "compiler": "clang-11", + "compiler": "clang-12", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "none", @@ -539,7 +539,7 @@ }, "Split build smoke test": { "required_build_properties": { - "compiler": "clang-11", + "compiler": "clang-12", "package_type": "binary", "build_type": "relwithdebuginfo", "sanitizer": "none", @@ -551,7 +551,7 @@ }, "Testflows check": { "required_build_properties": { - "compiler": "clang-11", + "compiler": "clang-12", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "none", @@ -575,7 +575,7 @@ }, "Unit tests release clang": { "required_build_properties": { - "compiler": "clang-11", + "compiler": "clang-12", "package_type": "binary", "build_type": "relwithdebuginfo", "sanitizer": "none", @@ -587,7 +587,7 @@ }, "Unit tests ASAN": { "required_build_properties": { - "compiler": "clang-11", + "compiler": "clang-12", "package_type": "binary", "build_type": "relwithdebuginfo", "sanitizer": "address", @@ -599,7 +599,7 @@ }, "Unit tests MSAN": { "required_build_properties": { - "compiler": "clang-11", + "compiler": "clang-12", "package_type": "binary", "build_type": "relwithdebuginfo", "sanitizer": "memory", @@ -611,7 +611,7 @@ }, "Unit tests TSAN": { "required_build_properties": { - "compiler": "clang-11", + "compiler": "clang-12", "package_type": "binary", "build_type": "relwithdebuginfo", "sanitizer": "thread", @@ -623,7 +623,7 @@ }, "Unit tests UBSAN": { "required_build_properties": { - "compiler": "clang-11", + "compiler": "clang-12", "package_type": "binary", "build_type": "relwithdebuginfo", "sanitizer": "thread", @@ -635,7 +635,7 @@ }, "AST fuzzer (debug)": { "required_build_properties": { - "compiler": "clang-11", + "compiler": "clang-12", "package_type": "binary", "build_type": "debug", "sanitizer": "none", @@ -647,7 +647,7 @@ }, "AST fuzzer (ASan)": { "required_build_properties": { - "compiler": "clang-11", + "compiler": "clang-12", "package_type": "binary", "build_type": "relwithdebuginfo", "sanitizer": "address", @@ -659,7 +659,7 @@ }, "AST fuzzer (MSan)": { "required_build_properties": { - "compiler": "clang-11", + "compiler": "clang-12", "package_type": "binary", "build_type": "relwithdebuginfo", "sanitizer": "memory", @@ -671,7 +671,7 @@ }, "AST fuzzer (TSan)": { "required_build_properties": { - "compiler": "clang-11", + "compiler": "clang-12", "package_type": "binary", "build_type": "relwithdebuginfo", "sanitizer": "thread", @@ -683,7 +683,7 @@ }, "AST fuzzer (UBSan)": { "required_build_properties": { - "compiler": "clang-11", + "compiler": "clang-12", "package_type": "binary", "build_type": "relwithdebuginfo", "sanitizer": "undefined", @@ -695,7 +695,7 @@ }, "Release": { "required_build_properties": { - "compiler": "clang-11", + "compiler": "clang-12", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "none", @@ -707,7 +707,7 @@ }, "Functional stateless tests flaky check (address)": { "required_build_properties": { - "compiler": "clang-11", + "compiler": "clang-12", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "address", From aabf5307c417a4757470bfff1da1ddb428ef3773 Mon Sep 17 00:00:00 2001 From: Yatsishin Ilya <2159081+qoega@users.noreply.github.com> Date: Fri, 30 Apr 2021 13:26:13 +0300 Subject: [PATCH 002/125] more --- docker/test/keeper-jepsen/run.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/test/keeper-jepsen/run.sh b/docker/test/keeper-jepsen/run.sh index 352585e16e3..8d31b5b7f1c 100644 --- a/docker/test/keeper-jepsen/run.sh +++ b/docker/test/keeper-jepsen/run.sh @@ -2,7 +2,7 @@ set -euo pipefail -CLICKHOUSE_PACKAGE=${CLICKHOUSE_PACKAGE:="https://clickhouse-builds.s3.yandex.net/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/clang-11_relwithdebuginfo_none_bundled_unsplitted_disable_False_binary/clickhouse"} +CLICKHOUSE_PACKAGE=${CLICKHOUSE_PACKAGE:="https://clickhouse-builds.s3.yandex.net/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/clang-12_relwithdebuginfo_none_bundled_unsplitted_disable_False_binary/clickhouse"} CLICKHOUSE_REPO_PATH=${CLICKHOUSE_REPO_PATH:=""} From cf277a67846b909dff43d09060e27a75c585ad6f Mon Sep 17 00:00:00 2001 From: Yatsishin Ilya <2159081+qoega@users.noreply.github.com> Date: Fri, 30 Apr 2021 17:55:38 +0300 Subject: [PATCH 003/125] find llvm --- cmake/find/llvm.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/find/llvm.cmake b/cmake/find/llvm.cmake index e0ba1d9b039..0025cc0f9d3 100644 --- a/cmake/find/llvm.cmake +++ b/cmake/find/llvm.cmake @@ -26,7 +26,7 @@ endif () if (NOT USE_INTERNAL_LLVM_LIBRARY) set (LLVM_PATHS "/usr/local/lib/llvm") - foreach(llvm_v 10 9 8) + foreach(llvm_v 12 11) if (NOT LLVM_FOUND) find_package (LLVM ${llvm_v} CONFIG PATHS ${LLVM_PATHS}) endif () From a4e6a96c8243d8a50907f6d831c2ff91d0477516 Mon Sep 17 00:00:00 2001 From: Yatsishin Ilya <2159081+qoega@users.noreply.github.com> Date: Fri, 30 Apr 2021 19:02:23 +0300 Subject: [PATCH 004/125] fasttest change --- docker/test/fasttest/Dockerfile | 2 +- docker/test/fasttest/run.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docker/test/fasttest/Dockerfile b/docker/test/fasttest/Dockerfile index 2864f7fc4da..0c7e2af6ec6 100644 --- a/docker/test/fasttest/Dockerfile +++ b/docker/test/fasttest/Dockerfile @@ -1,7 +1,7 @@ # docker build -t yandex/clickhouse-fasttest . FROM ubuntu:20.04 -ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=11 +ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=12 RUN apt-get update \ && apt-get install ca-certificates lsb-release wget gnupg apt-transport-https \ diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh index a7cc398e5c9..d7bf73f4755 100755 --- a/docker/test/fasttest/run.sh +++ b/docker/test/fasttest/run.sh @@ -9,7 +9,7 @@ trap 'kill $(jobs -pr) ||:' EXIT stage=${stage:-} # Compiler version, normally set by Dockerfile -export LLVM_VERSION=${LLVM_VERSION:-11} +export LLVM_VERSION=${LLVM_VERSION:-12} # A variable to pass additional flags to CMake. # Here we explicitly default it to nothing so that bash doesn't complain about From 366a7fe45bbfdbfa6f6ad07bafe293054500c0b5 Mon Sep 17 00:00:00 2001 From: Yatsishin Ilya <2159081+qoega@users.noreply.github.com> Date: Wed, 12 May 2021 18:24:27 +0300 Subject: [PATCH 005/125] linker path required to be specific one --- docker/packager/binary/build.sh | 22 ++++++++++++++++------ docker/packager/packager | 5 ++++- 2 files changed, 20 insertions(+), 7 deletions(-) diff --git a/docker/packager/binary/build.sh b/docker/packager/binary/build.sh index cf74105fbbb..d746aed76ed 100755 --- a/docker/packager/binary/build.sh +++ b/docker/packager/binary/build.sh @@ -2,14 +2,23 @@ set -x -e -mkdir -p build/cmake/toolchain/darwin-x86_64 -tar xJf MacOSX10.15.sdk.tar.xz -C build/cmake/toolchain/darwin-x86_64 --strip-components=1 +if [ "1" == "${IS_CROSS_DARWIN:0}" ] +then + mkdir -p build/cmake/toolchain/darwin-x86_64 + tar xJf MacOSX10.15.sdk.tar.xz -C build/cmake/toolchain/darwin-x86_64 --strip-components=1 +fi -mkdir -p build/cmake/toolchain/linux-aarch64 -tar xJf gcc-arm-8.3-2019.03-x86_64-aarch64-linux-gnu.tar.xz -C build/cmake/toolchain/linux-aarch64 --strip-components=1 +if [ "1" == "${IS_CROSS_ARM:0}" ] +then + mkdir -p build/cmake/toolchain/linux-aarch64 + tar xJf gcc-arm-8.3-2019.03-x86_64-aarch64-linux-gnu.tar.xz -C build/cmake/toolchain/linux-aarch64 --strip-components=1 +fi -mkdir -p build/cmake/toolchain/freebsd-x86_64 -tar xJf freebsd-11.3-toolchain.tar.xz -C build/cmake/toolchain/freebsd-x86_64 --strip-components=1 +if [ "1" == "${IS_CROSS_ARM:0}" ] +then + mkdir -p build/cmake/toolchain/freebsd-x86_64 + tar xJf freebsd-11.3-toolchain.tar.xz -C build/cmake/toolchain/freebsd-x86_64 --strip-components=1 +fi # Uncomment to debug ccache. Don't put ccache log in /output right away, or it # will be confusingly packed into the "performance" package. @@ -21,6 +30,7 @@ cd build/build_docker rm -f CMakeCache.txt # Read cmake arguments into array (possibly empty) read -ra CMAKE_FLAGS <<< "${CMAKE_FLAGS:-}" +env cmake --debug-trycompile --verbose=1 -DCMAKE_VERBOSE_MAKEFILE=1 -LA "-DCMAKE_BUILD_TYPE=$BUILD_TYPE" "-DSANITIZE=$SANITIZER" -DENABLE_CHECK_HEAVY_BUILDS=1 "${CMAKE_FLAGS[@]}" .. ccache --show-config ||: diff --git a/docker/packager/packager b/docker/packager/packager index 9b7692b57ae..6c9cfcc7a1a 100755 --- a/docker/packager/packager +++ b/docker/packager/packager @@ -73,9 +73,10 @@ def parse_env_variables(build_type, compiler, sanitizer, package_type, image_typ # Explicitly use LLD with Clang by default. # Don't force linker for cross-compilation. if is_clang and not is_cross_compile: - cmake_flags.append("-DLINKER_NAME=lld") + cmake_flags.append("-DLINKER_NAME=ld.lld") if is_cross_darwin: + result.append("IS_CROSS_DARWIN=1") cc = compiler[:-len(DARWIN_SUFFIX)] cmake_flags.append("-DCMAKE_AR:FILEPATH=/cctools/bin/x86_64-apple-darwin-ar") cmake_flags.append("-DCMAKE_INSTALL_NAME_TOOL=/cctools/bin/x86_64-apple-darwin-install_name_tool") @@ -83,9 +84,11 @@ def parse_env_variables(build_type, compiler, sanitizer, package_type, image_typ cmake_flags.append("-DLINKER_NAME=/cctools/bin/x86_64-apple-darwin-ld") cmake_flags.append("-DCMAKE_TOOLCHAIN_FILE=/build/cmake/darwin/toolchain-x86_64.cmake") elif is_cross_arm: + result.append("IS_CROSS_ARM=1") cc = compiler[:-len(ARM_SUFFIX)] cmake_flags.append("-DCMAKE_TOOLCHAIN_FILE=/build/cmake/linux/toolchain-aarch64.cmake") elif is_cross_freebsd: + result.append("IS_CROSS_FREEBSD=1") cc = compiler[:-len(FREEBSD_SUFFIX)] cmake_flags.append("-DCMAKE_TOOLCHAIN_FILE=/build/cmake/freebsd/toolchain-x86_64.cmake") else: From 783e9b3c1c440787a4a10a6f1d14f19223743aac Mon Sep 17 00:00:00 2001 From: Yatsishin Ilya <2159081+qoega@users.noreply.github.com> Date: Thu, 13 May 2021 14:13:37 +0300 Subject: [PATCH 006/125] more --- docker/builder/Dockerfile | 2 +- docker/test/base/Dockerfile | 2 +- tests/ci/ci_config.json | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docker/builder/Dockerfile b/docker/builder/Dockerfile index 199b5217d79..e9ba6b2ccc1 100644 --- a/docker/builder/Dockerfile +++ b/docker/builder/Dockerfile @@ -1,6 +1,6 @@ FROM ubuntu:20.04 -ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=11 +ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=12 RUN apt-get update \ && apt-get install ca-certificates lsb-release wget gnupg apt-transport-https \ diff --git a/docker/test/base/Dockerfile b/docker/test/base/Dockerfile index 44b9d42d6a1..5e41ee11ea0 100644 --- a/docker/test/base/Dockerfile +++ b/docker/test/base/Dockerfile @@ -1,7 +1,7 @@ # docker build -t yandex/clickhouse-test-base . FROM ubuntu:20.04 -ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=11 +ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=12 RUN apt-get update \ && apt-get install ca-certificates lsb-release wget gnupg apt-transport-https \ diff --git a/tests/ci/ci_config.json b/tests/ci/ci_config.json index ee0e1a4c09d..f2c70fa1b8e 100644 --- a/tests/ci/ci_config.json +++ b/tests/ci/ci_config.json @@ -253,7 +253,7 @@ }, "Functional stateful tests (release, DatabaseReplicated)": { "required_build_properties": { - "compiler": "clang-11", + "compiler": "clang-12", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "none", @@ -385,7 +385,7 @@ }, "Functional stateless tests (release, DatabaseReplicated)": { "required_build_properties": { - "compiler": "clang-11", + "compiler": "clang-12", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "none", From d3149ae61cd8cfdbf6d7f876db7c73e2c36df960 Mon Sep 17 00:00:00 2001 From: Yatsishin Ilya <2159081+qoega@users.noreply.github.com> Date: Fri, 21 May 2021 17:42:04 +0300 Subject: [PATCH 007/125] more --- cmake/tools.cmake | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/cmake/tools.cmake b/cmake/tools.cmake index 8ff94ab867b..f94f4b289a3 100644 --- a/cmake/tools.cmake +++ b/cmake/tools.cmake @@ -79,8 +79,9 @@ endif () if (LINKER_NAME) if (COMPILER_CLANG AND (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 12.0.0 OR CMAKE_CXX_COMPILER_VERSION VERSION_EQUAL 12.0.0)) - set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} --ld-path=${LINKER_NAME}") - set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} --ld-path=${LINKER_NAME}") + find_program (LLD_PATH NAMES ${LINKER_NAME}) + set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} --ld-path=${LLD_PATH}") + set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} --ld-path=${LLD_PATH}") else () set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fuse-ld=${LINKER_NAME}") set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -fuse-ld=${LINKER_NAME}") From 50e233680a6d7f259dfc5670eccbb4cae1bda656 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Fri, 21 May 2021 01:04:26 +0300 Subject: [PATCH 008/125] LLVM remove non internal build --- cmake/find/llvm.cmake | 125 ++++++++++++++++--------------- cmake/find/termcap.cmake | 28 +++---- contrib/CMakeLists.txt | 2 +- utils/ci/build-normal.sh | 5 -- utils/ci/default-config | 1 - utils/ci/install-libraries.sh | 4 - utils/ci/jobs/quick-build/run.sh | 1 - 7 files changed, 78 insertions(+), 88 deletions(-) diff --git a/cmake/find/llvm.cmake b/cmake/find/llvm.cmake index c2259fc7757..88ce8927497 100644 --- a/cmake/find/llvm.cmake +++ b/cmake/find/llvm.cmake @@ -1,81 +1,82 @@ -if (APPLE OR SPLIT_SHARED_LIBRARIES OR NOT ARCH_AMD64) +if (APPLE OR SPLIT_SHARED_LIBRARIES OR NOT ARCH_AMD64 OR SANITIZE STREQUAL "undefined") set (ENABLE_EMBEDDED_COMPILER OFF CACHE INTERNAL "") endif() option (ENABLE_EMBEDDED_COMPILER "Enable support for 'compile_expressions' option for query execution" ON) + # Broken in macos. TODO: update clang, re-test, enable on Apple -if (ENABLE_EMBEDDED_COMPILER AND NOT SPLIT_SHARED_LIBRARIES AND ARCH_AMD64 AND NOT (SANITIZE STREQUAL "undefined")) - option (USE_INTERNAL_LLVM_LIBRARY "Use bundled or system LLVM library." ${NOT_UNBUNDLED}) -endif() +# if (ENABLE_EMBEDDED_COMPILER AND NOT SPLIT_SHARED_LIBRARIES AND ARCH_AMD64 AND NOT (SANITIZE STREQUAL "undefined")) +# option (USE_INTERNAL_LLVM_LIBRARY "Use bundled or system LLVM library." ${NOT_UNBUNDLED}) +# endif() if (NOT ENABLE_EMBEDDED_COMPILER) - if(USE_INTERNAL_LLVM_LIBRARY) - message (${RECONFIGURE_MESSAGE_LEVEL} "Cannot use internal LLVM library with ENABLE_EMBEDDED_COMPILER=OFF") - endif() +# if(USE_INTERNAL_LLVM_LIBRARY) +# message (${RECONFIGURE_MESSAGE_LEVEL} "Cannot use internal LLVM library with ENABLE_EMBEDDED_COMPILER=OFF") +# endif() + set (USE_EMBEDDED_COMPILER 0) return() endif() if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/llvm/llvm/CMakeLists.txt") - if (USE_INTERNAL_LLVM_LIBRARY) - message (WARNING "submodule contrib/llvm is missing. to fix try run: \n git submodule update --init --recursive") - message (${RECONFIGURE_MESSAGE_LEVEL} "Can't fidd internal LLVM library") - endif() - set (MISSING_INTERNAL_LLVM_LIBRARY 1) + # if (USE_INTERNAL_LLVM_LIBRARY) + message (${RECONFIGURE_MESSAGE_LEVEL} "submodule /contrib/llvm is missing. to fix try run: \n git submodule update --init --recursive") + # message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal LLVM library") + # endif() + # set (MISSING_INTERNAL_LLVM_LIBRARY 1) endif () -if (NOT USE_INTERNAL_LLVM_LIBRARY) - set (LLVM_PATHS "/usr/local/lib/llvm" "/usr/lib/llvm") +# if (NOT USE_INTERNAL_LLVM_LIBRARY) +# set (LLVM_PATHS "/usr/local/lib/llvm" "/usr/lib/llvm") +# foreach(llvm_v 12 11.1 11) +# if (NOT LLVM_FOUND) +# find_package (LLVM ${llvm_v} CONFIG PATHS ${LLVM_PATHS}) +# endif () +# endforeach () - foreach(llvm_v 12 11.1 11) - if (NOT LLVM_FOUND) - find_package (LLVM ${llvm_v} CONFIG PATHS ${LLVM_PATHS}) - endif () - endforeach () +# if (LLVM_FOUND) +# # Remove dynamically-linked zlib and libedit from LLVM's dependencies: +# set_target_properties(LLVMSupport PROPERTIES INTERFACE_LINK_LIBRARIES "-lpthread;LLVMDemangle;${ZLIB_LIBRARIES}") +# set_target_properties(LLVMLineEditor PROPERTIES INTERFACE_LINK_LIBRARIES "LLVMSupport") - if (LLVM_FOUND) - # Remove dynamically-linked zlib and libedit from LLVM's dependencies: - set_target_properties(LLVMSupport PROPERTIES INTERFACE_LINK_LIBRARIES "-lpthread;LLVMDemangle;${ZLIB_LIBRARIES}") - set_target_properties(LLVMLineEditor PROPERTIES INTERFACE_LINK_LIBRARIES "LLVMSupport") +# option(LLVM_HAS_RTTI "Enable if LLVM was build with RTTI enabled" ON) +set (USE_EMBEDDED_COMPILER 1) +# else() +# message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find system LLVM") +# set (USE_EMBEDDED_COMPILER 0) +# endif() - option(LLVM_HAS_RTTI "Enable if LLVM was build with RTTI enabled" ON) - set (USE_EMBEDDED_COMPILER 1) - else() - message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find system LLVM") - set (USE_EMBEDDED_COMPILER 0) - endif() +# if (LLVM_FOUND AND OS_LINUX AND USE_LIBCXX AND NOT FORCE_LLVM_WITH_LIBCXX) +# message(WARNING "Option USE_INTERNAL_LLVM_LIBRARY is not set but the LLVM library from OS packages " +# "in Linux is incompatible with libc++ ABI. LLVM Will be disabled. Force: -DFORCE_LLVM_WITH_LIBCXX=ON") +# message (${RECONFIGURE_MESSAGE_LEVEL} "Unsupported LLVM configuration, cannot enable LLVM") +# set (LLVM_FOUND 0) +# set (USE_EMBEDDED_COMPILER 0) +# endif () +# endif() - if (LLVM_FOUND AND OS_LINUX AND USE_LIBCXX AND NOT FORCE_LLVM_WITH_LIBCXX) - message(WARNING "Option USE_INTERNAL_LLVM_LIBRARY is not set but the LLVM library from OS packages " - "in Linux is incompatible with libc++ ABI. LLVM Will be disabled. Force: -DFORCE_LLVM_WITH_LIBCXX=ON") - message (${RECONFIGURE_MESSAGE_LEVEL} "Unsupported LLVM configuration, cannot enable LLVM") - set (LLVM_FOUND 0) - set (USE_EMBEDDED_COMPILER 0) - endif () -endif() - -if(NOT LLVM_FOUND AND NOT MISSING_INTERNAL_LLVM_LIBRARY) - if (CMAKE_CURRENT_SOURCE_DIR STREQUAL CMAKE_CURRENT_BINARY_DIR) - message(WARNING "Option ENABLE_EMBEDDED_COMPILER is set but internal LLVM library cannot build if build directory is the same as source directory.") - set (LLVM_FOUND 0) - set (USE_EMBEDDED_COMPILER 0) - elseif (SPLIT_SHARED_LIBRARIES) +# if(NOT LLVM_FOUND AND NOT MISSING_INTERNAL_LLVM_LIBRARY) +# if (CMAKE_CURRENT_SOURCE_DIR STREQUAL CMAKE_CURRENT_BINARY_DIR) +# message(WARNING "Option ENABLE_EMBEDDED_COMPILER is set but internal LLVM library cannot build if build directory is the same as source directory.") +# set (LLVM_FOUND 0) +# set (USE_EMBEDDED_COMPILER 0) +# elseif (SPLIT_SHARED_LIBRARIES) # llvm-tablegen cannot find shared libraries that we build. Probably can be easily fixed. - message(WARNING "Option USE_INTERNAL_LLVM_LIBRARY is not compatible with SPLIT_SHARED_LIBRARIES. Build of LLVM will be disabled.") - set (LLVM_FOUND 0) - set (USE_EMBEDDED_COMPILER 0) - elseif (NOT ARCH_AMD64) + # message(WARNING "Option USE_INTERNAL_LLVM_LIBRARY is not compatible with SPLIT_SHARED_LIBRARIES. Build of LLVM will be disabled.") + # set (LLVM_FOUND 0) + # set (USE_EMBEDDED_COMPILER 0) + # elseif (NOT ARCH_AMD64) # It's not supported yet, but you can help. - message(WARNING "Option USE_INTERNAL_LLVM_LIBRARY is only available for x86_64. Build of LLVM will be disabled.") - set (LLVM_FOUND 0) - set (USE_EMBEDDED_COMPILER 0) - elseif (SANITIZE STREQUAL "undefined") - # llvm-tblgen, that is used during LLVM build, doesn't work with UBSan. - message(WARNING "Option USE_INTERNAL_LLVM_LIBRARY does not work with UBSan, because 'llvm-tblgen' tool from LLVM has undefined behaviour. Build of LLVM will be disabled.") - set (LLVM_FOUND 0) - set (USE_EMBEDDED_COMPILER 0) - else () - set (USE_INTERNAL_LLVM_LIBRARY ON) + # message(WARNING "Option USE_INTERNAL_LLVM_LIBRARY is only available for x86_64. Build of LLVM will be disabled.") + # set (LLVM_FOUND 0) + # set (USE_EMBEDDED_COMPILER 0) + # elseif (SANITIZE STREQUAL "undefined") + # # llvm-tblgen, that is used during LLVM build, doesn't work with UBSan. + # message(WARNING "Option USE_INTERNAL_LLVM_LIBRARY does not work with UBSan, because 'llvm-tblgen' tool from LLVM has undefined behaviour. Build of LLVM will be disabled.") + # set (LLVM_FOUND 0) + # set (USE_EMBEDDED_COMPILER 0) + # else () + # set (USE_INTERNAL_LLVM_LIBRARY ON) set (LLVM_FOUND 1) set (USE_EMBEDDED_COMPILER 1) set (LLVM_VERSION "9.0.0bundled") @@ -87,13 +88,13 @@ if(NOT LLVM_FOUND AND NOT MISSING_INTERNAL_LLVM_LIBRARY) endif() endif() -if (LLVM_FOUND) +# if (LLVM_FOUND) message(STATUS "LLVM include Directory: ${LLVM_INCLUDE_DIRS}") message(STATUS "LLVM library Directory: ${LLVM_LIBRARY_DIRS}") message(STATUS "LLVM C++ compiler flags: ${LLVM_CXXFLAGS}") -else() - message (${RECONFIGURE_MESSAGE_LEVEL} "Can't enable LLVM") -endif() +# else() +# message (${RECONFIGURE_MESSAGE_LEVEL} "Can't enable LLVM") +# endif() # This list was generated by listing all LLVM libraries, compiling the binary and removing all libraries while it still compiles. set (REQUIRED_LLVM_LIBRARIES diff --git a/cmake/find/termcap.cmake b/cmake/find/termcap.cmake index 58454165785..448ef34f3c3 100644 --- a/cmake/find/termcap.cmake +++ b/cmake/find/termcap.cmake @@ -1,17 +1,17 @@ -if (ENABLE_EMBEDDED_COMPILER AND NOT USE_INTERNAL_LLVM_LIBRARY AND USE_STATIC_LIBRARIES) - find_library (TERMCAP_LIBRARY tinfo) - if (NOT TERMCAP_LIBRARY) - find_library (TERMCAP_LIBRARY ncurses) - endif() - if (NOT TERMCAP_LIBRARY) - find_library (TERMCAP_LIBRARY termcap) - endif() +# if (ENABLE_EMBEDDED_COMPILER AND NOT USE_INTERNAL_LLVM_LIBRARY AND USE_STATIC_LIBRARIES) +# find_library (TERMCAP_LIBRARY tinfo) +# if (NOT TERMCAP_LIBRARY) +# find_library (TERMCAP_LIBRARY ncurses) +# endif() +# if (NOT TERMCAP_LIBRARY) +# find_library (TERMCAP_LIBRARY termcap) +# endif() - if (NOT TERMCAP_LIBRARY) - message (FATAL_ERROR "Statically Linking external LLVM requires termcap") - endif() +# if (NOT TERMCAP_LIBRARY) +# message (FATAL_ERROR "Statically Linking external LLVM requires termcap") +# endif() - target_link_libraries(LLVMSupport INTERFACE ${TERMCAP_LIBRARY}) +# target_link_libraries(LLVMSupport INTERFACE ${TERMCAP_LIBRARY}) - message (STATUS "Using termcap: ${TERMCAP_LIBRARY}") -endif() +# message (STATUS "Using termcap: ${TERMCAP_LIBRARY}") +# endif() diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 9eafec23f51..21d26695e33 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -205,7 +205,7 @@ elseif(GTEST_SRC_DIR) target_compile_definitions(gtest INTERFACE GTEST_HAS_POSIX_RE=0) endif() -if (USE_EMBEDDED_COMPILER AND USE_INTERNAL_LLVM_LIBRARY) +if (USE_EMBEDDED_COMPILER) # ld: unknown option: --color-diagnostics if (APPLE) set (LINKER_SUPPORTS_COLOR_DIAGNOSTICS 0 CACHE INTERNAL "") diff --git a/utils/ci/build-normal.sh b/utils/ci/build-normal.sh index b937269c8a3..328bd2c9f51 100755 --- a/utils/ci/build-normal.sh +++ b/utils/ci/build-normal.sh @@ -8,11 +8,6 @@ source default-config mkdir -p "${WORKSPACE}/build" pushd "${WORKSPACE}/build" -if [[ "${ENABLE_EMBEDDED_COMPILER}" == 1 ]]; then - [[ "$USE_LLVM_LIBRARIES_FROM_SYSTEM" == 0 ]] && CMAKE_FLAGS="$CMAKE_FLAGS -DUSE_INTERNAL_LLVM_LIBRARY=1" - [[ "$USE_LLVM_LIBRARIES_FROM_SYSTEM" != 0 ]] && CMAKE_FLAGS="$CMAKE_FLAGS -DUSE_INTERNAL_LLVM_LIBRARY=0" -fi - cmake -DCMAKE_BUILD_TYPE=${BUILD_TYPE} -DENABLE_EMBEDDED_COMPILER=${ENABLE_EMBEDDED_COMPILER} $CMAKE_FLAGS ../sources [[ "$BUILD_TARGETS" != 'all' ]] && BUILD_TARGETS_STRING="--target $BUILD_TARGETS" diff --git a/utils/ci/default-config b/utils/ci/default-config index cd6f25ecf9b..b66121cc757 100644 --- a/utils/ci/default-config +++ b/utils/ci/default-config @@ -27,7 +27,6 @@ CLANG_SOURCES_BRANCH=trunk # or tags/RELEASE_600/final GCC_SOURCES_VERSION=latest # or gcc-7.1.0 # install-libraries -USE_LLVM_LIBRARIES_FROM_SYSTEM=0 # 0 or 1 ENABLE_EMBEDDED_COMPILER=1 # build diff --git a/utils/ci/install-libraries.sh b/utils/ci/install-libraries.sh index d7fb856dbed..7615375fbc1 100755 --- a/utils/ci/install-libraries.sh +++ b/utils/ci/install-libraries.sh @@ -5,7 +5,3 @@ source default-config ./install-os-packages.sh libicu-dev ./install-os-packages.sh libreadline-dev - -if [[ "$ENABLE_EMBEDDED_COMPILER" == 1 && "$USE_LLVM_LIBRARIES_FROM_SYSTEM" == 1 ]]; then - ./install-os-packages.sh llvm-libs-5.0 -fi diff --git a/utils/ci/jobs/quick-build/run.sh b/utils/ci/jobs/quick-build/run.sh index 3d755625c8d..af977d14465 100755 --- a/utils/ci/jobs/quick-build/run.sh +++ b/utils/ci/jobs/quick-build/run.sh @@ -15,7 +15,6 @@ SOURCES_METHOD=local COMPILER=clang COMPILER_INSTALL_METHOD=packages COMPILER_PACKAGE_VERSION=6.0 -USE_LLVM_LIBRARIES_FROM_SYSTEM=0 BUILD_METHOD=normal BUILD_TARGETS=clickhouse BUILD_TYPE=Debug From 07556fac2ce0b04907f2ca2c52aacdbb3bf4e73b Mon Sep 17 00:00:00 2001 From: Yatsishin Ilya <2159081+qoega@users.noreply.github.com> Date: Tue, 25 May 2021 12:52:53 +0300 Subject: [PATCH 009/125] try fix aarch64 --- cmake/tools.cmake | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/cmake/tools.cmake b/cmake/tools.cmake index f94f4b289a3..0e213c285d8 100644 --- a/cmake/tools.cmake +++ b/cmake/tools.cmake @@ -77,6 +77,11 @@ if (OS_LINUX AND NOT LINKER_NAME) endif () endif () +if (LINKER_NAME AND NOT LLD_PATH) + find_program (LLD_PATH NAMES "ld.lld-${COMPILER_VERSION_MAJOR}" "lld-${COMPILER_VERSION_MAJOR}" "ld.lld" "lld") + find_program (GOLD_PATH NAMES "ld.gold" "gold") +endif () + if (LINKER_NAME) if (COMPILER_CLANG AND (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 12.0.0 OR CMAKE_CXX_COMPILER_VERSION VERSION_EQUAL 12.0.0)) find_program (LLD_PATH NAMES ${LINKER_NAME}) From 4f711ee038f82a4d506cc3ab8176d5e17bfd3adb Mon Sep 17 00:00:00 2001 From: Yatsishin Ilya <2159081+qoega@users.noreply.github.com> Date: Tue, 25 May 2021 16:05:30 +0300 Subject: [PATCH 010/125] fix darwin --- cmake/tools.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/tools.cmake b/cmake/tools.cmake index 0e213c285d8..7c15332a51a 100644 --- a/cmake/tools.cmake +++ b/cmake/tools.cmake @@ -77,7 +77,7 @@ if (OS_LINUX AND NOT LINKER_NAME) endif () endif () -if (LINKER_NAME AND NOT LLD_PATH) +if (NOT OS_DARWIN AND LINKER_NAME AND NOT LLD_PATH) find_program (LLD_PATH NAMES "ld.lld-${COMPILER_VERSION_MAJOR}" "lld-${COMPILER_VERSION_MAJOR}" "ld.lld" "lld") find_program (GOLD_PATH NAMES "ld.gold" "gold") endif () From 227eb9fda5a386ec4aa64fa23a203b876be322dc Mon Sep 17 00:00:00 2001 From: Yatsishin Ilya <2159081+qoega@users.noreply.github.com> Date: Tue, 25 May 2021 18:29:04 +0300 Subject: [PATCH 011/125] try --- cmake/freebsd/toolchain-x86_64.cmake | 2 +- cmake/linux/toolchain-aarch64.cmake | 2 +- cmake/tools.cmake | 5 ----- 3 files changed, 2 insertions(+), 7 deletions(-) diff --git a/cmake/freebsd/toolchain-x86_64.cmake b/cmake/freebsd/toolchain-x86_64.cmake index d9839ec74ee..f9e45686db7 100644 --- a/cmake/freebsd/toolchain-x86_64.cmake +++ b/cmake/freebsd/toolchain-x86_64.cmake @@ -10,7 +10,7 @@ set (CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) # disable linkage check - it set (CMAKE_AR "/usr/bin/ar" CACHE FILEPATH "" FORCE) set (CMAKE_RANLIB "/usr/bin/ranlib" CACHE FILEPATH "" FORCE) -set (LINKER_NAME "lld" CACHE STRING "" FORCE) +set (LINKER_NAME "ld.lld" CACHE STRING "" FORCE) set (CMAKE_EXE_LINKER_FLAGS_INIT "-fuse-ld=lld") set (CMAKE_SHARED_LINKER_FLAGS_INIT "-fuse-ld=lld") diff --git a/cmake/linux/toolchain-aarch64.cmake b/cmake/linux/toolchain-aarch64.cmake index e3924fdc537..b4dc6e45cbb 100644 --- a/cmake/linux/toolchain-aarch64.cmake +++ b/cmake/linux/toolchain-aarch64.cmake @@ -13,7 +13,7 @@ set (CMAKE_C_FLAGS_INIT "${CMAKE_C_FLAGS} --gcc-toolchain=${CMAKE_CURRENT_LIST_D set (CMAKE_CXX_FLAGS_INIT "${CMAKE_CXX_FLAGS} --gcc-toolchain=${CMAKE_CURRENT_LIST_DIR}/../toolchain/linux-aarch64") set (CMAKE_ASM_FLAGS_INIT "${CMAKE_ASM_FLAGS} --gcc-toolchain=${CMAKE_CURRENT_LIST_DIR}/../toolchain/linux-aarch64") -set (LINKER_NAME "lld" CACHE STRING "" FORCE) +set (LINKER_NAME "ld.lld" CACHE STRING "" FORCE) set (CMAKE_EXE_LINKER_FLAGS_INIT "-fuse-ld=lld") set (CMAKE_SHARED_LINKER_FLAGS_INIT "-fuse-ld=lld") diff --git a/cmake/tools.cmake b/cmake/tools.cmake index 7c15332a51a..f94f4b289a3 100644 --- a/cmake/tools.cmake +++ b/cmake/tools.cmake @@ -77,11 +77,6 @@ if (OS_LINUX AND NOT LINKER_NAME) endif () endif () -if (NOT OS_DARWIN AND LINKER_NAME AND NOT LLD_PATH) - find_program (LLD_PATH NAMES "ld.lld-${COMPILER_VERSION_MAJOR}" "lld-${COMPILER_VERSION_MAJOR}" "ld.lld" "lld") - find_program (GOLD_PATH NAMES "ld.gold" "gold") -endif () - if (LINKER_NAME) if (COMPILER_CLANG AND (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 12.0.0 OR CMAKE_CXX_COMPILER_VERSION VERSION_EQUAL 12.0.0)) find_program (LLD_PATH NAMES ${LINKER_NAME}) From fc212753d5d1019e08bdab633a41e003d480fe51 Mon Sep 17 00:00:00 2001 From: romanzhukov Date: Sun, 22 Aug 2021 17:38:07 +0300 Subject: [PATCH 012/125] Add Snowflake ID-date docs. --- .../functions/type-conversion-functions.md | 32 ++-- .../functions/type-conversion-functions.md | 141 ++++++++++++++++++ .../sql-reference/statements/create/table.md | 1 + 3 files changed, 155 insertions(+), 19 deletions(-) diff --git a/docs/en/sql-reference/functions/type-conversion-functions.md b/docs/en/sql-reference/functions/type-conversion-functions.md index ad6edaea312..df1994e86fa 100644 --- a/docs/en/sql-reference/functions/type-conversion-functions.md +++ b/docs/en/sql-reference/functions/type-conversion-functions.md @@ -1342,7 +1342,7 @@ Result: ## snowflakeToDateTime {#snowflakeToDateTime} -Extract time from snowflake id as DateTime format. +Extracts time from Snowflake ID as [DateTime](../data-types/datetime.md) format. **Syntax** @@ -1352,12 +1352,12 @@ snowflakeToDateTime(value [, time_zone]) **Parameters** -- `value` — `snowflake id`, Int64 value. +- `value` — Snowflake ID. [Int64](../data-types/int-uint.md). - `time_zone` — [Timezone](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](../../sql-reference/data-types/string.md). **Returned value** -- value converted to the `DateTime` data type. +- Value converted to the [DateTime](../data-types/datetime.md) data type. **Example** @@ -1378,7 +1378,7 @@ Result: ## snowflakeToDateTime64 {#snowflakeToDateTime64} -Extract time from snowflake id as DateTime64 format. +Extracts time from Snowflake ID as [DateTime64](../data-types/datetime64.md) format. **Syntax** @@ -1388,12 +1388,12 @@ snowflakeToDateTime64(value [, time_zone]) **Parameters** -- `value` — `snowflake id`, Int64 value. +- `value` — Snowflake ID. [Int64](../data-types/int-uint.md). - `time_zone` — [Timezone](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](../../sql-reference/data-types/string.md). **Returned value** -- value converted to the `DateTime64` data type. +- Value converted to the [DateTime64](../data-types/datetime64.md) data type. **Example** @@ -1414,7 +1414,7 @@ Result: ## dateTimeToSnowflake {#dateTimeToSnowflake} -Convert DateTime to the first snowflake id at the giving time. +Converts [DateTime](../data-types/datetime.md) value to the first Snowflake ID at the giving time. **Syntax** @@ -1426,33 +1426,29 @@ dateTimeToSnowflake(value) - `value` — Date and time. [DateTime](../../sql-reference/data-types/datetime.md). - **Returned value** -- `value` converted to the `Int64` data type as the first snowflake id at that time. +- `value` converted to the [Int64](../data-types/int-uint.md) data type as the first Snowflake ID at that time. **Example** Query: ``` sql -WITH toDateTime('2021-08-15 18:57:56', 'Asia/Shanghai') AS dt -SELECT dateTimeToSnowflake(dt); +WITH toDateTime('2021-08-15 18:57:56', 'Asia/Shanghai') AS dt SELECT dateTimeToSnowflake(dt); ``` Result: ``` text - ┌─dateTimeToSnowflake(dt)─┐ │ 1426860702823350272 │ └─────────────────────────┘ ``` - ## dateTime64ToSnowflake {#dateTime64ToSnowflake} -Convert DateTime64 to the first snowflake id at the giving time. +Convert [DateTime64](../data-types/datetime64.md) to the first Snowflake ID at the giving time. **Syntax** @@ -1464,18 +1460,16 @@ dateTime64ToSnowflake(value) - `value` — Date and time. [DateTime64](../../sql-reference/data-types/datetime64.md). - **Returned value** -- `value` converted to the `Int64` data type as the first snowflake id at that time. +- `value` converted to the [Int64](../data-types/int-uint.md) data type as the first Snowflake ID at that time. **Example** Query: ``` sql -WITH toDateTime64('2021-08-15 18:57:56.492', 3, 'Asia/Shanghai') AS dt64 -SELECT dateTime64ToSnowflake(dt64); +WITH toDateTime64('2021-08-15 18:57:56.492', 3, 'Asia/Shanghai') AS dt64 SELECT dateTime64ToSnowflake(dt64); ``` Result: @@ -1484,4 +1478,4 @@ Result: ┌─dateTime64ToSnowflake(dt64)─┐ │ 1426860704886947840 │ └─────────────────────────────┘ -``` \ No newline at end of file +``` diff --git a/docs/ru/sql-reference/functions/type-conversion-functions.md b/docs/ru/sql-reference/functions/type-conversion-functions.md index 757afca9588..0793864f526 100644 --- a/docs/ru/sql-reference/functions/type-conversion-functions.md +++ b/docs/ru/sql-reference/functions/type-conversion-functions.md @@ -1338,3 +1338,144 @@ FROM numbers(3); │ 2,"good" │ └───────────────────────────────────────────┘ ``` + +## snowflakeToDateTime {#snowflakeToDateTime} + +Извлекает время из Snowflake ID в формате [DateTime](../data-types/datetime.md). + +**Синтаксис** + +``` sql +snowflakeToDateTime(value [, time_zone]) +``` + +**Аргументы** + +- `value` — Snowflake ID. [Int64](../data-types/int-uint.md). +- `time_zone` — [временная зона сервера](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone). Функция распознает `time_string` в соответствии с часовым поясом. Необязательный. [String](../../sql-reference/data-types/string.md). + +**Возвращаемое значение** + +- Значение, преобразованное в фомат [DateTime](../data-types/datetime.md). + +**Пример** + +Запрос: + +``` sql +SELECT snowflakeToDateTime(CAST('1426860702823350272', 'Int64'), 'UTC'); +``` + +Результат: + +``` text + +┌─snowflakeToDateTime(CAST('1426860702823350272', 'Int64'), 'UTC')─┐ +│ 2021-08-15 10:57:56 │ +└──────────────────────────────────────────────────────────────────┘ +``` + +## snowflakeToDateTime64 {#snowflakeToDateTime64} + +Извлекает время из Snowflake ID в формате [DateTime64](../data-types/datetime64.md). + +**Синтаксис** + +``` sql +snowflakeToDateTime64(value [, time_zone]) +``` + +**Аргументы** + +- `value` — Snowflake ID. [Int64](../data-types/int-uint.md). +- `time_zone` — [временная зона сервера](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone). Функция распознает `time_string` в соответствии с часовым поясом. Необязательный. [String](../../sql-reference/data-types/string.md). + +**Возвращаемое значение** + +- Значение, преобразованное в фомат [DateTime64](../data-types/datetime64.md). + +**Пример** + +Запрос: + +``` sql +SELECT snowflakeToDateTime64(CAST('1426860802823350272', 'Int64'), 'UTC'); +``` + +Результат: + +``` text + +┌─snowflakeToDateTime64(CAST('1426860802823350272', 'Int64'), 'UTC')─┐ +│ 2021-08-15 10:58:19.841 │ +└────────────────────────────────────────────────────────────────────┘ +``` + +## dateTimeToSnowflake {#dateTimeToSnowflake} + +Преобразует значение [DateTime](../data-types/datetime.md) в первый идентификатор Snowflake ID на текущий момент. + +**Syntax** + +``` sql +dateTimeToSnowflake(value) +``` + +**Аргументы** + +- `value` — дата и время. [DateTime](../../sql-reference/data-types/datetime.md). + +**Возвращаемое значение** + +- Значение, преобразованное в [Int64](../data-types/int-uint.md), как первый идентификатор Snowflake ID в момент выполнения. + +**Пример** + +Запрос: + +``` sql +WITH toDateTime('2021-08-15 18:57:56', 'Asia/Shanghai') AS dt SELECT dateTimeToSnowflake(dt); +``` + +Результат: + +``` text +┌─dateTimeToSnowflake(dt)─┐ +│ 1426860702823350272 │ +└─────────────────────────┘ +``` + +## dateTime64ToSnowflake {#dateTime64ToSnowflake} + +Преобразует значение [DateTime64](../data-types/datetime64.md) в первый идентификатор Snowflake ID на текущий момент. + +**Синтаксис** + +``` sql +dateTime64ToSnowflake(value) +``` + +**Аргументы** + +- `value` — дата и время. [DateTime64](../data-types/datetime64.md). + +**Возвращаемое значение** + +- Значение, преобразованное в [Int64](../data-types/int-uint.md), как первый идентификатор Snowflake ID в момент выполнения. + + +**Пример** + +Запрос: + +``` sql +WITH toDateTime64('2021-08-15 18:57:56.492', 3, 'Asia/Shanghai') AS dt64 SELECT dateTime64ToSnowflake(dt64); +``` + +Результат: + +``` text +┌─dateTime64ToSnowflake(dt64)─┐ +│ 1426860704886947840 │ +└─────────────────────────────┘ +``` diff --git a/docs/ru/sql-reference/statements/create/table.md b/docs/ru/sql-reference/statements/create/table.md index 073cd4fa7c1..77c192b2b26 100644 --- a/docs/ru/sql-reference/statements/create/table.md +++ b/docs/ru/sql-reference/statements/create/table.md @@ -247,6 +247,7 @@ CREATE TABLE codec_example ) ENGINE = MergeTree() ``` + ## Временные таблицы {#temporary-tables} ClickHouse поддерживает временные таблицы со следующими характеристиками: From 57664d4802e2bf8d09f865f9e0ac5e4cc6da2977 Mon Sep 17 00:00:00 2001 From: romanzhukov Date: Sun, 22 Aug 2021 17:48:35 +0300 Subject: [PATCH 013/125] Minor fix --- .../sql-reference/functions/type-conversion-functions.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/en/sql-reference/functions/type-conversion-functions.md b/docs/en/sql-reference/functions/type-conversion-functions.md index df1994e86fa..32822aae488 100644 --- a/docs/en/sql-reference/functions/type-conversion-functions.md +++ b/docs/en/sql-reference/functions/type-conversion-functions.md @@ -1357,7 +1357,7 @@ snowflakeToDateTime(value [, time_zone]) **Returned value** -- Value converted to the [DateTime](../data-types/datetime.md) data type. +- Input value converted to the [DateTime](../data-types/datetime.md) data type. **Example** @@ -1393,7 +1393,7 @@ snowflakeToDateTime64(value [, time_zone]) **Returned value** -- Value converted to the [DateTime64](../data-types/datetime64.md) data type. +- Input value converted to the [DateTime64](../data-types/datetime64.md) data type. **Example** @@ -1428,7 +1428,7 @@ dateTimeToSnowflake(value) **Returned value** -- `value` converted to the [Int64](../data-types/int-uint.md) data type as the first Snowflake ID at that time. +- Input value converted to the [Int64](../data-types/int-uint.md) data type as the first Snowflake ID at that time. **Example** @@ -1462,7 +1462,7 @@ dateTime64ToSnowflake(value) **Returned value** -- `value` converted to the [Int64](../data-types/int-uint.md) data type as the first Snowflake ID at that time. +- Input value converted to the [Int64](../data-types/int-uint.md) data type as the first Snowflake ID at that time. **Example** From 30f404f4b6b8dbee29c086154efe3aea95e536d4 Mon Sep 17 00:00:00 2001 From: Roman Zhukov Date: Tue, 24 Aug 2021 16:43:56 +0000 Subject: [PATCH 014/125] Update setting ZooKeeper --- .../settings.md | 14 ++++++++++++-- .../settings.md | 18 ++++++++++++++++-- 2 files changed, 28 insertions(+), 4 deletions(-) diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md index 66a2bde4b4b..adc7c3e0845 100644 --- a/docs/en/operations/server-configuration-parameters/settings.md +++ b/docs/en/operations/server-configuration-parameters/settings.md @@ -1253,7 +1253,7 @@ If this section is specified, the path from [users_config](../../operations/serv The `user_directories` section can contain any number of items, the order of the items means their precedence (the higher the item the higher the precedence). -**Example** +**Examples** ``` xml @@ -1263,13 +1263,23 @@ The `user_directories` section can contain any number of items, the order of the /var/lib/clickhouse/access/ + +``` + +Users, roles, row policies, quotas and profiles can be also stored in ZooKeeper: + +``` xml + + + /etc/clickhouse-server/users.xml + /clickhouse/access/ ``` -You can also specify settings `memory` — means storing information only in memory, without writing to disk, and `ldap` — means storing information on an LDAP server. +You can also define sections memory `memory` — means storing information only in memory, without writing to disk, and `ldap` — means storing information on an LDAP server. To add an LDAP server as a remote user directory of users that are not defined locally, define a single `ldap` section with a following parameters: - `server` — one of LDAP server names defined in `ldap_servers` config section. This parameter is mandatory and cannot be empty. diff --git a/docs/ru/operations/server-configuration-parameters/settings.md b/docs/ru/operations/server-configuration-parameters/settings.md index 98c5748ba41..8c6033b75ae 100644 --- a/docs/ru/operations/server-configuration-parameters/settings.md +++ b/docs/ru/operations/server-configuration-parameters/settings.md @@ -1200,12 +1200,13 @@ ClickHouse использует ZooKeeper для хранения метадан Секция конфигурационного файла,которая содержит настройки: - Путь к конфигурационному файлу с предустановленными пользователями. - Путь к файлу, в котором содержатся пользователи, созданные при помощи SQL команд. +- Путь к узлу ZooKeeper, где хранятся и реплицируются пользователи, созданные с помощью команд SQL (экспериментально). Если эта секция определена, путь из [users_config](../../operations/server-configuration-parameters/settings.md#users-config) и [access_control_path](../../operations/server-configuration-parameters/settings.md#access_control_path) не используется. Секция `user_directories` может содержать любое количество элементов, порядок расположения элементов обозначает их приоритет (чем выше элемент, тем выше приоритет). -**Пример** +**Примеры** ``` xml @@ -1218,7 +1219,20 @@ ClickHouse использует ZooKeeper для хранения метадан ``` -Также вы можете указать настройку `memory` — означает хранение информации только в памяти, без записи на диск, и `ldap` — означает хранения информации на [LDAP-сервере](https://en.wikipedia.org/wiki/Lightweight_Directory_Access_Protocol). +Пользователи, роли, политики доступа к строкам, квоты и профили могут храниться в ZooKeeper: + +``` xml + + + /etc/clickhouse-server/users.xml + + + /clickhouse/access/ + + +``` + +Также вы можете секцию `memory` — означает хранение информации только в памяти, без записи на диск, и `ldap` — означает хранения информации на [LDAP-сервере](https://en.wikipedia.org/wiki/Lightweight_Directory_Access_Protocol). Чтобы добавить LDAP-сервер в качестве удаленного каталога пользователей, которые не определены локально, определите один раздел `ldap` со следующими параметрами: - `server` — имя одного из LDAP-серверов, определенных в секции `ldap_servers` конфигурациионного файла. Этот параметр явялется необязательным и может быть пустым. From 6caced9de215dbfa511ea9180dbb2527010bb39f Mon Sep 17 00:00:00 2001 From: Roman Bug Date: Wed, 25 Aug 2021 10:43:39 +0300 Subject: [PATCH 015/125] Update docs/en/operations/server-configuration-parameters/settings.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- docs/en/operations/server-configuration-parameters/settings.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md index adc7c3e0845..199e287702f 100644 --- a/docs/en/operations/server-configuration-parameters/settings.md +++ b/docs/en/operations/server-configuration-parameters/settings.md @@ -1266,7 +1266,7 @@ The `user_directories` section can contain any number of items, the order of the ``` -Users, roles, row policies, quotas and profiles can be also stored in ZooKeeper: +Users, roles, row policies, quotas, and profiles can be also stored in ZooKeeper: ``` xml From a140891ac078540db80b07c91b224d248d74ee5d Mon Sep 17 00:00:00 2001 From: Roman Bug Date: Wed, 25 Aug 2021 10:44:01 +0300 Subject: [PATCH 016/125] Update docs/en/operations/server-configuration-parameters/settings.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- docs/en/operations/server-configuration-parameters/settings.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md index 199e287702f..41cfc75fe9a 100644 --- a/docs/en/operations/server-configuration-parameters/settings.md +++ b/docs/en/operations/server-configuration-parameters/settings.md @@ -1279,7 +1279,7 @@ Users, roles, row policies, quotas, and profiles can be also stored in ZooKeeper ``` -You can also define sections memory `memory` — means storing information only in memory, without writing to disk, and `ldap` — means storing information on an LDAP server. +You can also define sections `memory` — means storing information only in memory, without writing to disk, and `ldap` — means storing information on an LDAP server. To add an LDAP server as a remote user directory of users that are not defined locally, define a single `ldap` section with a following parameters: - `server` — one of LDAP server names defined in `ldap_servers` config section. This parameter is mandatory and cannot be empty. From e5b600eadc908b18318719ee8cb1ee9e08df71e3 Mon Sep 17 00:00:00 2001 From: Roman Bug Date: Wed, 25 Aug 2021 10:44:08 +0300 Subject: [PATCH 017/125] Update docs/en/sql-reference/functions/type-conversion-functions.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- docs/en/sql-reference/functions/type-conversion-functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/functions/type-conversion-functions.md b/docs/en/sql-reference/functions/type-conversion-functions.md index 32822aae488..ff3c5375a9a 100644 --- a/docs/en/sql-reference/functions/type-conversion-functions.md +++ b/docs/en/sql-reference/functions/type-conversion-functions.md @@ -1376,7 +1376,7 @@ Result: └──────────────────────────────────────────────────────────────────┘ ``` -## snowflakeToDateTime64 {#snowflakeToDateTime64} +## snowflakeToDateTime64 {#snowflaketodatetime64} Extracts time from Snowflake ID as [DateTime64](../data-types/datetime64.md) format. From c5b9bbb7e139262618440ee60c030c158b31411e Mon Sep 17 00:00:00 2001 From: Roman Bug Date: Wed, 25 Aug 2021 10:44:32 +0300 Subject: [PATCH 018/125] Update docs/en/sql-reference/functions/type-conversion-functions.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- docs/en/sql-reference/functions/type-conversion-functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/functions/type-conversion-functions.md b/docs/en/sql-reference/functions/type-conversion-functions.md index ff3c5375a9a..8766f8c4392 100644 --- a/docs/en/sql-reference/functions/type-conversion-functions.md +++ b/docs/en/sql-reference/functions/type-conversion-functions.md @@ -1412,7 +1412,7 @@ Result: └────────────────────────────────────────────────────────────────────┘ ``` -## dateTimeToSnowflake {#dateTimeToSnowflake} +## dateTimeToSnowflake {#datetimetosnowflake} Converts [DateTime](../data-types/datetime.md) value to the first Snowflake ID at the giving time. From e3e9ac5c9c6b92e1cbb504247c39c3ea28d22df8 Mon Sep 17 00:00:00 2001 From: Roman Bug Date: Wed, 25 Aug 2021 10:44:43 +0300 Subject: [PATCH 019/125] Update docs/en/sql-reference/functions/type-conversion-functions.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- docs/en/sql-reference/functions/type-conversion-functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/functions/type-conversion-functions.md b/docs/en/sql-reference/functions/type-conversion-functions.md index 8766f8c4392..488ff127b7d 100644 --- a/docs/en/sql-reference/functions/type-conversion-functions.md +++ b/docs/en/sql-reference/functions/type-conversion-functions.md @@ -1446,7 +1446,7 @@ Result: └─────────────────────────┘ ``` -## dateTime64ToSnowflake {#dateTime64ToSnowflake} +## dateTime64ToSnowflake {#datetime64tosnowflake} Convert [DateTime64](../data-types/datetime64.md) to the first Snowflake ID at the giving time. From fd580de663954455bf9f97fdfdc414bbf4606082 Mon Sep 17 00:00:00 2001 From: Roman Bug Date: Wed, 25 Aug 2021 10:44:50 +0300 Subject: [PATCH 020/125] Update docs/ru/operations/server-configuration-parameters/settings.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- docs/ru/operations/server-configuration-parameters/settings.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/operations/server-configuration-parameters/settings.md b/docs/ru/operations/server-configuration-parameters/settings.md index 8c6033b75ae..0850b59cdb7 100644 --- a/docs/ru/operations/server-configuration-parameters/settings.md +++ b/docs/ru/operations/server-configuration-parameters/settings.md @@ -1232,7 +1232,7 @@ ClickHouse использует ZooKeeper для хранения метадан ``` -Также вы можете секцию `memory` — означает хранение информации только в памяти, без записи на диск, и `ldap` — означает хранения информации на [LDAP-сервере](https://en.wikipedia.org/wiki/Lightweight_Directory_Access_Protocol). +Также вы можете добавить секции `memory` — означает хранение информации только в памяти, без записи на диск, и `ldap` — означает хранения информации на [LDAP-сервере](https://en.wikipedia.org/wiki/Lightweight_Directory_Access_Protocol). Чтобы добавить LDAP-сервер в качестве удаленного каталога пользователей, которые не определены локально, определите один раздел `ldap` со следующими параметрами: - `server` — имя одного из LDAP-серверов, определенных в секции `ldap_servers` конфигурациионного файла. Этот параметр явялется необязательным и может быть пустым. From a151aff2ca6cf33b4e8fe4a4e14f18a1b345503b Mon Sep 17 00:00:00 2001 From: Roman Bug Date: Wed, 25 Aug 2021 10:44:57 +0300 Subject: [PATCH 021/125] Update docs/ru/sql-reference/functions/type-conversion-functions.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- docs/ru/sql-reference/functions/type-conversion-functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/sql-reference/functions/type-conversion-functions.md b/docs/ru/sql-reference/functions/type-conversion-functions.md index 0793864f526..ab38222ed10 100644 --- a/docs/ru/sql-reference/functions/type-conversion-functions.md +++ b/docs/ru/sql-reference/functions/type-conversion-functions.md @@ -1339,7 +1339,7 @@ FROM numbers(3); └───────────────────────────────────────────┘ ``` -## snowflakeToDateTime {#snowflakeToDateTime} +## snowflakeToDateTime {#snowflaketodatetime} Извлекает время из Snowflake ID в формате [DateTime](../data-types/datetime.md). From 30cfd8bcc2e0140646b8040972bf6227f1a0ab3c Mon Sep 17 00:00:00 2001 From: Roman Bug Date: Wed, 25 Aug 2021 10:45:05 +0300 Subject: [PATCH 022/125] Update docs/ru/sql-reference/functions/type-conversion-functions.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- docs/ru/sql-reference/functions/type-conversion-functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/sql-reference/functions/type-conversion-functions.md b/docs/ru/sql-reference/functions/type-conversion-functions.md index ab38222ed10..ee2ac927e68 100644 --- a/docs/ru/sql-reference/functions/type-conversion-functions.md +++ b/docs/ru/sql-reference/functions/type-conversion-functions.md @@ -1375,7 +1375,7 @@ SELECT snowflakeToDateTime(CAST('1426860702823350272', 'Int64'), 'UTC'); └──────────────────────────────────────────────────────────────────┘ ``` -## snowflakeToDateTime64 {#snowflakeToDateTime64} +## snowflakeToDateTime64 {#snowflaketodatetime64} Извлекает время из Snowflake ID в формате [DateTime64](../data-types/datetime64.md). From 73cf90a674332107940bdcc35ae6e2cd6b10c2c6 Mon Sep 17 00:00:00 2001 From: Roman Bug Date: Wed, 25 Aug 2021 10:47:02 +0300 Subject: [PATCH 023/125] Update docs/ru/sql-reference/functions/type-conversion-functions.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- docs/ru/sql-reference/functions/type-conversion-functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/sql-reference/functions/type-conversion-functions.md b/docs/ru/sql-reference/functions/type-conversion-functions.md index ee2ac927e68..1e26eb023fb 100644 --- a/docs/ru/sql-reference/functions/type-conversion-functions.md +++ b/docs/ru/sql-reference/functions/type-conversion-functions.md @@ -1411,7 +1411,7 @@ SELECT snowflakeToDateTime64(CAST('1426860802823350272', 'Int64'), 'UTC'); └────────────────────────────────────────────────────────────────────┘ ``` -## dateTimeToSnowflake {#dateTimeToSnowflake} +## dateTimeToSnowflake {#datetimetosnowflake} Преобразует значение [DateTime](../data-types/datetime.md) в первый идентификатор Snowflake ID на текущий момент. From f0573add400d3bb3d7a72b8d4b4bf8373805d4ef Mon Sep 17 00:00:00 2001 From: Roman Bug Date: Wed, 25 Aug 2021 10:47:07 +0300 Subject: [PATCH 024/125] Update docs/ru/sql-reference/functions/type-conversion-functions.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- docs/ru/sql-reference/functions/type-conversion-functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/sql-reference/functions/type-conversion-functions.md b/docs/ru/sql-reference/functions/type-conversion-functions.md index 1e26eb023fb..76fc13e09f1 100644 --- a/docs/ru/sql-reference/functions/type-conversion-functions.md +++ b/docs/ru/sql-reference/functions/type-conversion-functions.md @@ -1445,7 +1445,7 @@ WITH toDateTime('2021-08-15 18:57:56', 'Asia/Shanghai') AS dt SELECT dateTimeToS └─────────────────────────┘ ``` -## dateTime64ToSnowflake {#dateTime64ToSnowflake} +## dateTime64ToSnowflake {#datetime64tosnowflake} Преобразует значение [DateTime64](../data-types/datetime64.md) в первый идентификатор Snowflake ID на текущий момент. From 82129dc2fed77263f87bd62a6358197e9e37c326 Mon Sep 17 00:00:00 2001 From: Roman Bug Date: Wed, 25 Aug 2021 10:47:14 +0300 Subject: [PATCH 025/125] Update docs/en/sql-reference/functions/type-conversion-functions.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- docs/en/sql-reference/functions/type-conversion-functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/functions/type-conversion-functions.md b/docs/en/sql-reference/functions/type-conversion-functions.md index 488ff127b7d..52b4da796f4 100644 --- a/docs/en/sql-reference/functions/type-conversion-functions.md +++ b/docs/en/sql-reference/functions/type-conversion-functions.md @@ -1378,7 +1378,7 @@ Result: ## snowflakeToDateTime64 {#snowflaketodatetime64} -Extracts time from Snowflake ID as [DateTime64](../data-types/datetime64.md) format. +Extracts time from [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) as [DateTime64](../data-types/datetime64.md) format. **Syntax** From 62724a1566fcf85ff4171b8b8eb7674958e9edc6 Mon Sep 17 00:00:00 2001 From: Roman Bug Date: Wed, 25 Aug 2021 10:47:20 +0300 Subject: [PATCH 026/125] Update docs/en/sql-reference/functions/type-conversion-functions.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- docs/en/sql-reference/functions/type-conversion-functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/functions/type-conversion-functions.md b/docs/en/sql-reference/functions/type-conversion-functions.md index 52b4da796f4..10c9d9f4664 100644 --- a/docs/en/sql-reference/functions/type-conversion-functions.md +++ b/docs/en/sql-reference/functions/type-conversion-functions.md @@ -1414,7 +1414,7 @@ Result: ## dateTimeToSnowflake {#datetimetosnowflake} -Converts [DateTime](../data-types/datetime.md) value to the first Snowflake ID at the giving time. +Converts [DateTime](../data-types/datetime.md) value to the first [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) at the giving time. **Syntax** From af874dd7e052d0d6dbfbef8d0150c98d947e92e3 Mon Sep 17 00:00:00 2001 From: Roman Bug Date: Wed, 25 Aug 2021 10:47:26 +0300 Subject: [PATCH 027/125] Update docs/en/sql-reference/functions/type-conversion-functions.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- docs/en/sql-reference/functions/type-conversion-functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/functions/type-conversion-functions.md b/docs/en/sql-reference/functions/type-conversion-functions.md index 10c9d9f4664..d8c40f6f551 100644 --- a/docs/en/sql-reference/functions/type-conversion-functions.md +++ b/docs/en/sql-reference/functions/type-conversion-functions.md @@ -1448,7 +1448,7 @@ Result: ## dateTime64ToSnowflake {#datetime64tosnowflake} -Convert [DateTime64](../data-types/datetime64.md) to the first Snowflake ID at the giving time. +Convert [DateTime64](../data-types/datetime64.md) to the first [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) at the giving time. **Syntax** From 40cb7171a89f75ee67a387c7ecf19eb164c618aa Mon Sep 17 00:00:00 2001 From: Roman Bug Date: Wed, 25 Aug 2021 10:47:32 +0300 Subject: [PATCH 028/125] Update docs/ru/sql-reference/functions/type-conversion-functions.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- docs/ru/sql-reference/functions/type-conversion-functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/sql-reference/functions/type-conversion-functions.md b/docs/ru/sql-reference/functions/type-conversion-functions.md index 76fc13e09f1..9cb8b9875b2 100644 --- a/docs/ru/sql-reference/functions/type-conversion-functions.md +++ b/docs/ru/sql-reference/functions/type-conversion-functions.md @@ -1377,7 +1377,7 @@ SELECT snowflakeToDateTime(CAST('1426860702823350272', 'Int64'), 'UTC'); ## snowflakeToDateTime64 {#snowflaketodatetime64} -Извлекает время из Snowflake ID в формате [DateTime64](../data-types/datetime64.md). +Извлекает время из [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) в формате [DateTime64](../data-types/datetime64.md). **Синтаксис** From feb149f0ee533fc70d276a3f78d7a2b248a8fe1f Mon Sep 17 00:00:00 2001 From: Roman Bug Date: Wed, 25 Aug 2021 10:47:39 +0300 Subject: [PATCH 029/125] Update docs/ru/sql-reference/functions/type-conversion-functions.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- docs/ru/sql-reference/functions/type-conversion-functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/sql-reference/functions/type-conversion-functions.md b/docs/ru/sql-reference/functions/type-conversion-functions.md index 9cb8b9875b2..f6bc06771e9 100644 --- a/docs/ru/sql-reference/functions/type-conversion-functions.md +++ b/docs/ru/sql-reference/functions/type-conversion-functions.md @@ -1447,7 +1447,7 @@ WITH toDateTime('2021-08-15 18:57:56', 'Asia/Shanghai') AS dt SELECT dateTimeToS ## dateTime64ToSnowflake {#datetime64tosnowflake} -Преобразует значение [DateTime64](../data-types/datetime64.md) в первый идентификатор Snowflake ID на текущий момент. +Преобразует значение [DateTime64](../data-types/datetime64.md) в первый идентификатор [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) на текущий момент. **Синтаксис** From a8fd2813184e1b91ee70443dc22c8d59c5c33b00 Mon Sep 17 00:00:00 2001 From: Roman Bug Date: Wed, 25 Aug 2021 10:47:47 +0300 Subject: [PATCH 030/125] Update docs/ru/sql-reference/functions/type-conversion-functions.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- docs/ru/sql-reference/functions/type-conversion-functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/sql-reference/functions/type-conversion-functions.md b/docs/ru/sql-reference/functions/type-conversion-functions.md index f6bc06771e9..543b80f4cc0 100644 --- a/docs/ru/sql-reference/functions/type-conversion-functions.md +++ b/docs/ru/sql-reference/functions/type-conversion-functions.md @@ -1413,7 +1413,7 @@ SELECT snowflakeToDateTime64(CAST('1426860802823350272', 'Int64'), 'UTC'); ## dateTimeToSnowflake {#datetimetosnowflake} -Преобразует значение [DateTime](../data-types/datetime.md) в первый идентификатор Snowflake ID на текущий момент. +Преобразует значение [DateTime](../data-types/datetime.md) в первый идентификатор [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) на текущий момент. **Syntax** From 1b4ea013475ba6f4bec9ac3dee3c35ea75eb9d87 Mon Sep 17 00:00:00 2001 From: Roman Bug Date: Wed, 25 Aug 2021 10:48:10 +0300 Subject: [PATCH 031/125] Update docs/ru/sql-reference/functions/type-conversion-functions.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- docs/ru/sql-reference/functions/type-conversion-functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/sql-reference/functions/type-conversion-functions.md b/docs/ru/sql-reference/functions/type-conversion-functions.md index 543b80f4cc0..912f4ec0b1c 100644 --- a/docs/ru/sql-reference/functions/type-conversion-functions.md +++ b/docs/ru/sql-reference/functions/type-conversion-functions.md @@ -1341,7 +1341,7 @@ FROM numbers(3); ## snowflakeToDateTime {#snowflaketodatetime} -Извлекает время из Snowflake ID в формате [DateTime](../data-types/datetime.md). +Извлекает время из [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) в формате [DateTime](../data-types/datetime.md). **Синтаксис** From 4ddf7a2841fe2cc8bb1118a3c689d0ad6c5922f8 Mon Sep 17 00:00:00 2001 From: Roman Bug Date: Wed, 25 Aug 2021 10:48:16 +0300 Subject: [PATCH 032/125] Update docs/en/sql-reference/functions/type-conversion-functions.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- docs/en/sql-reference/functions/type-conversion-functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/functions/type-conversion-functions.md b/docs/en/sql-reference/functions/type-conversion-functions.md index d8c40f6f551..0ec144f5d69 100644 --- a/docs/en/sql-reference/functions/type-conversion-functions.md +++ b/docs/en/sql-reference/functions/type-conversion-functions.md @@ -1342,7 +1342,7 @@ Result: ## snowflakeToDateTime {#snowflakeToDateTime} -Extracts time from Snowflake ID as [DateTime](../data-types/datetime.md) format. +Extracts time from [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) as [DateTime](../data-types/datetime.md) format. **Syntax** From 3ce50c3b4ac315420d52a5c8adfe9b27ac12799b Mon Sep 17 00:00:00 2001 From: Roman Bug Date: Wed, 25 Aug 2021 10:48:23 +0300 Subject: [PATCH 033/125] Update docs/en/sql-reference/functions/type-conversion-functions.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- docs/en/sql-reference/functions/type-conversion-functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/functions/type-conversion-functions.md b/docs/en/sql-reference/functions/type-conversion-functions.md index 0ec144f5d69..22a15a4a4f8 100644 --- a/docs/en/sql-reference/functions/type-conversion-functions.md +++ b/docs/en/sql-reference/functions/type-conversion-functions.md @@ -1340,7 +1340,7 @@ Result: └───────────────────────────────────────────┘ ``` -## snowflakeToDateTime {#snowflakeToDateTime} +## snowflakeToDateTime {#snowflaketodatetime} Extracts time from [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) as [DateTime](../data-types/datetime.md) format. From 2e4da05708b1f68b50fc1de3276cf319bd1972fe Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Fri, 27 Aug 2021 13:35:07 +0300 Subject: [PATCH 034/125] CHJIT custom memory manager --- src/Interpreters/JIT/CHJIT.cpp | 124 +++++++++++++++++++++++++++++++-- 1 file changed, 117 insertions(+), 7 deletions(-) diff --git a/src/Interpreters/JIT/CHJIT.cpp b/src/Interpreters/JIT/CHJIT.cpp index c06b4132309..04625ada9d1 100644 --- a/src/Interpreters/JIT/CHJIT.cpp +++ b/src/Interpreters/JIT/CHJIT.cpp @@ -2,6 +2,8 @@ #if USE_EMBEDDED_COMPILER +#include + #include #include #include @@ -22,7 +24,10 @@ #include #include +#include #include +#include + namespace DB { @@ -31,6 +36,8 @@ namespace ErrorCodes { extern const int CANNOT_COMPILE_CODE; extern const int LOGICAL_ERROR; + extern const int CANNOT_ALLOCATE_MEMORY; + extern const int CANNOT_MPROTECT; } /** Simple module to object file compiler. @@ -113,25 +120,128 @@ class JITModuleMemoryManager { public: llvm::sys::MemoryBlock allocateMappedMemory( - llvm::SectionMemoryManager::AllocationPurpose Purpose [[maybe_unused]], + llvm::SectionMemoryManager::AllocationPurpose, size_t NumBytes, - const llvm::sys::MemoryBlock * const NearBlock, + const llvm::sys::MemoryBlock * const, unsigned Flags, std::error_code & EC) override { - auto allocated_memory_block = llvm::sys::Memory::allocateMappedMemory(NumBytes, NearBlock, Flags, EC); - allocated_size += allocated_memory_block.allocatedSize(); - return allocated_memory_block; + EC = std::error_code(); + if (NumBytes == 0) + return llvm::sys::MemoryBlock(); + + int protection_flags = getPosixProtectionFlags(Flags); + +#if defined(__NetBSD__) && defined(PROT_MPROTECT) + protection_flags |= PROT_MPROTECT(PROT_READ | PROT_WRITE | PROT_EXEC); +#endif + + auto page_size = getPageSize(); + auto num_pages = (NumBytes + page_size - 1) / page_size; + auto allocate_size = num_pages * page_size; + + void * buf = nullptr; + int res = posix_memalign(&buf, page_size, allocate_size); + + if (res != 0) + throwFromErrno(fmt::format("Cannot allocate memory (posix_memalign) alignment {} size {}.", + page_size, + ReadableSize(allocate_size)), + ErrorCodes::CANNOT_ALLOCATE_MEMORY, + res); + + auto result = llvm::sys::MemoryBlock(buf, allocate_size); + protectBlock(result, protection_flags); + allocated_size += result.allocatedSize(); + + return result; } std::error_code protectMappedMemory(const llvm::sys::MemoryBlock & Block, unsigned Flags) override { - return llvm::sys::Memory::protectMappedMemory(Block, Flags); + int protection_flags = getPosixProtectionFlags(Flags); + bool invalidate_cache = (Flags & llvm::sys::Memory::MF_EXEC); + +#if defined(__arm__) || defined(__aarch64__) + // Certain ARM implementations treat icache clear instruction as a memory read, + // and CPU segfaults on trying to clear cache on !PROT_READ page. Therefore we need + // to temporarily add PROT_READ for the sake of flushing the instruction caches. + if (invalidate_cache && !(protection_flags & PROT_READ)) { + protectBlock(Block, protection_flags | PROT_READ); + Memory::InvalidateInstructionCache(M.Address, M.AllocatedSize); + InvalidateCache = false; + } +#endif + + protectBlock(Block, protection_flags); + + if (invalidate_cache) + llvm::sys::Memory::InvalidateInstructionCache(Block.base(), Block.allocatedSize()); + + return std::error_code(); } - std::error_code releaseMappedMemory(llvm::sys::MemoryBlock & M) override { return llvm::sys::Memory::releaseMappedMemory(M); } + std::error_code releaseMappedMemory(llvm::sys::MemoryBlock & M) override + { + if (M.base() == nullptr || M.allocatedSize() == 0) + return std::error_code(); + + protectBlock(M, PROT_READ | PROT_WRITE); + + free(M.base()); + allocated_size -= M.allocatedSize(); + + return std::error_code(); + } size_t allocated_size = 0; + + private: + + static void protectBlock(const llvm::sys::MemoryBlock & block, int protection_flags) + { + int res = ::mprotect(block.base(), block.allocatedSize(), protection_flags); + if (res != 0) + throwFromErrno(fmt::format("Cannot protect memory (m_protect) alignment {} size {}.", + block.base(), + block.allocatedSize()), + ErrorCodes::CANNOT_MPROTECT, + res); + } + + static int getPosixProtectionFlags(unsigned flags) + { + switch (flags & llvm::sys::Memory::MF_RWE_MASK) + { + case llvm::sys::Memory::MF_READ: + return PROT_READ; + case llvm::sys::Memory::MF_WRITE: + return PROT_WRITE; + case llvm::sys::Memory::MF_READ | llvm::sys::Memory::MF_WRITE: + return PROT_READ | PROT_WRITE; + case llvm::sys::Memory::MF_READ | llvm::sys::Memory::MF_EXEC: + return PROT_READ | PROT_EXEC; + case llvm::sys::Memory::MF_READ | llvm::sys::Memory::MF_WRITE | + llvm::sys::Memory::MF_EXEC: + return PROT_READ | PROT_WRITE | PROT_EXEC; + case llvm::sys::Memory::MF_EXEC: + #if (defined(__FreeBSD__) || defined(__POWERPC__) || defined (__ppc__) || \ + defined(_POWER) || defined(_ARCH_PPC)) + // On PowerPC, having an executable page that has no read permission + // can have unintended consequences. The function InvalidateInstruction- + // Cache uses instructions dcbf and icbi, both of which are treated by + // the processor as loads. If the page has no read permissions, + // executing these instructions will result in a segmentation fault. + return PROT_READ | PROT_EXEC; + #else + return PROT_EXEC; + #endif + default: + __builtin_unreachable(); + } + // Provide a default return value as required by some compilers. + return PROT_NONE; + } }; public: From 4624bf70b0cccfccfd218226e4a95a01a72e5381 Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Sat, 28 Aug 2021 23:33:18 +0800 Subject: [PATCH 035/125] Always monotonic for non-zero division --- src/Functions/FunctionBinaryArithmetic.h | 34 ++- src/Storages/MergeTree/KeyCondition.cpp | 231 +++++++----------- src/Storages/MergeTree/KeyCondition.h | 8 + ...480_binary_operator_monotonicity.reference | 1 + .../01480_binary_operator_monotonicity.sql | 10 + 5 files changed, 138 insertions(+), 146 deletions(-) diff --git a/src/Functions/FunctionBinaryArithmetic.h b/src/Functions/FunctionBinaryArithmetic.h index 4907bf6abda..a30a40cc52b 100644 --- a/src/Functions/FunctionBinaryArithmetic.h +++ b/src/Functions/FunctionBinaryArithmetic.h @@ -1422,16 +1422,32 @@ public: Monotonicity getMonotonicityForRange(const IDataType &, const Field & left_point, const Field & right_point) const override { - // For simplicity, we treat null values as monotonicity breakers. + const std::string_view name_view = Name::name; + + // For simplicity, we treat null values as monotonicity breakers, except for variable / non-zero constant. if (left_point.isNull() || right_point.isNull()) + { + if (name_view == "divide" || name_view == "intDiv") + { + // variable / constant + if (right.column && isColumnConst(*right.column)) + { + auto constant = (*right.column)[0]; + if (applyVisitor(FieldVisitorAccurateEquals(), constant, Field(0))) + return {false, true, false}; // variable / 0 is undefined, let's treat it as non-monotonic + bool is_constant_positive = applyVisitor(FieldVisitorAccurateLess(), Field(0), constant); + + // division is saturated to `inf`, thus it doesn't have overflow issues. + return {true, is_constant_positive, true}; + } + } return {false, true, false}; + } // For simplicity, we treat every single value interval as positive monotonic. if (applyVisitor(FieldVisitorAccurateEquals(), left_point, right_point)) return {true, true, false}; - const std::string_view name_view = Name::name; - if (name_view == "minus" || name_view == "plus") { // const +|- variable @@ -1503,14 +1519,14 @@ public: return {true, true, false}; // 0 / 0 is undefined, thus it's not always monotonic bool is_constant_positive = applyVisitor(FieldVisitorAccurateLess(), Field(0), constant); - if (applyVisitor(FieldVisitorAccurateLess(), left_point, Field(0)) && - applyVisitor(FieldVisitorAccurateLess(), right_point, Field(0))) + if (applyVisitor(FieldVisitorAccurateLess(), left_point, Field(0)) + && applyVisitor(FieldVisitorAccurateLess(), right_point, Field(0))) { return {true, is_constant_positive, false}; } - else - if (applyVisitor(FieldVisitorAccurateLess(), Field(0), left_point) && - applyVisitor(FieldVisitorAccurateLess(), Field(0), right_point)) + else if ( + applyVisitor(FieldVisitorAccurateLess(), Field(0), left_point) + && applyVisitor(FieldVisitorAccurateLess(), Field(0), right_point)) { return {true, !is_constant_positive, false}; } @@ -1524,7 +1540,7 @@ public: bool is_constant_positive = applyVisitor(FieldVisitorAccurateLess(), Field(0), constant); // division is saturated to `inf`, thus it doesn't have overflow issues. - return {true, is_constant_positive, false}; + return {true, is_constant_positive, true}; } } return {false, true, false}; diff --git a/src/Storages/MergeTree/KeyCondition.cpp b/src/Storages/MergeTree/KeyCondition.cpp index b8896d535b4..6384316d07f 100644 --- a/src/Storages/MergeTree/KeyCondition.cpp +++ b/src/Storages/MergeTree/KeyCondition.cpp @@ -668,44 +668,34 @@ void KeyCondition::traverseAST(const ASTPtr & node, ContextPtr context, Block & rpn.emplace_back(std::move(element)); } -bool KeyCondition::canConstantBeWrappedByMonotonicFunctions( - const ASTPtr & node, + +/** The key functional expression constraint may be inferred from a plain column in the expression. + * For example, if the key contains `toStartOfHour(Timestamp)` and query contains `WHERE Timestamp >= now()`, + * it can be assumed that if `toStartOfHour()` is monotonic on [now(), inf), the `toStartOfHour(Timestamp) >= toStartOfHour(now())` + * condition also holds, so the index may be used to select only parts satisfying this condition. + * + * To check the assumption, we'd need to assert that the inverse function to this transformation is also monotonic, however the + * inversion isn't exported (or even viable for not strictly monotonic functions such as `toStartOfHour()`). + * Instead, we can qualify only functions that do not transform the range (for example rounding), + * which while not strictly monotonic, are monotonic everywhere on the input range. + */ +bool KeyCondition::transformConstantWithValidFunctions( + const String & expr_name, size_t & out_key_column_num, DataTypePtr & out_key_column_type, Field & out_value, - DataTypePtr & out_type) + DataTypePtr & out_type, + std::function always_monotonic) const { - String expr_name = node->getColumnNameWithoutAlias(); - - if (array_joined_columns.count(expr_name)) - return false; - - if (key_subexpr_names.count(expr_name) == 0) - return false; - - if (out_value.isNull()) - return false; - const auto & sample_block = key_expr->getSampleBlock(); - - /** The key functional expression constraint may be inferred from a plain column in the expression. - * For example, if the key contains `toStartOfHour(Timestamp)` and query contains `WHERE Timestamp >= now()`, - * it can be assumed that if `toStartOfHour()` is monotonic on [now(), inf), the `toStartOfHour(Timestamp) >= toStartOfHour(now())` - * condition also holds, so the index may be used to select only parts satisfying this condition. - * - * To check the assumption, we'd need to assert that the inverse function to this transformation is also monotonic, however the - * inversion isn't exported (or even viable for not strictly monotonic functions such as `toStartOfHour()`). - * Instead, we can qualify only functions that do not transform the range (for example rounding), - * which while not strictly monotonic, are monotonic everywhere on the input range. - */ - for (const auto & dag_node : key_expr->getNodes()) + for (const auto & node : key_expr->getNodes()) { - auto it = key_columns.find(dag_node.result_name); + auto it = key_columns.find(node.result_name); if (it != key_columns.end()) { std::stack chain; - const auto * cur_node = &dag_node; + const auto * cur_node = &node; bool is_valid_chain = true; while (is_valid_chain) @@ -715,21 +705,25 @@ bool KeyCondition::canConstantBeWrappedByMonotonicFunctions( chain.push(cur_node); - if (cur_node->type == ActionsDAG::ActionType::FUNCTION && cur_node->children.size() == 1) + if (cur_node->type == ActionsDAG::ActionType::FUNCTION && cur_node->children.size() <= 2) { - const auto * next_node = cur_node->children.front(); + is_valid_chain = always_monotonic(*cur_node->function_base, *cur_node->result_type); - if (!cur_node->function_base->hasInformationAboutMonotonicity()) - is_valid_chain = false; - else + const ActionsDAG::Node * next_node = nullptr; + for (const auto * arg : cur_node->children) { - /// Range is irrelevant in this case. - auto monotonicity = cur_node->function_base->getMonotonicityForRange( - *next_node->result_type, Field(), Field()); - if (!monotonicity.is_always_monotonic) + if (arg->column && isColumnConst(*arg->column)) + continue; + + if (next_node) is_valid_chain = false; + + next_node = arg; } + if (!next_node) + is_valid_chain = false; + cur_node = next_node; } else if (cur_node->type == ActionsDAG::ActionType::ALIAS) @@ -738,7 +732,7 @@ bool KeyCondition::canConstantBeWrappedByMonotonicFunctions( is_valid_chain = false; } - if (is_valid_chain && !chain.empty()) + if (is_valid_chain) { /// Here we cast constant to the input type. /// It is not clear, why this works in general. @@ -761,8 +755,30 @@ bool KeyCondition::canConstantBeWrappedByMonotonicFunctions( if (func->type != ActionsDAG::ActionType::FUNCTION) continue; - std::tie(const_value, const_type) = - applyFunctionForFieldOfUnknownType(func->function_base, const_type, const_value); + if (func->children.size() == 1) + { + std::tie(const_value, const_type) + = applyFunctionForFieldOfUnknownType(func->function_base, const_type, const_value); + } + else if (func->children.size() == 2) + { + const auto * left = func->children[0]; + const auto * right = func->children[1]; + if (left->column && isColumnConst(*left->column)) + { + auto left_arg_type = left->result_type; + auto left_arg_value = (*left->column)[0]; + std::tie(const_value, const_type) = applyBinaryFunctionForFieldOfUnknownType( + func->function_builder, left_arg_type, left_arg_value, const_type, const_value); + } + else + { + auto right_arg_type = right->result_type; + auto right_arg_value = (*right->column)[0]; + std::tie(const_value, const_type) = applyBinaryFunctionForFieldOfUnknownType( + func->function_builder, const_type, const_value, right_arg_type, right_arg_value); + } + } } out_key_column_num = it->second; @@ -773,10 +789,43 @@ bool KeyCondition::canConstantBeWrappedByMonotonicFunctions( } } } - return false; } +bool KeyCondition::canConstantBeWrappedByMonotonicFunctions( + const ASTPtr & node, + size_t & out_key_column_num, + DataTypePtr & out_key_column_type, + Field & out_value, + DataTypePtr & out_type) +{ + String expr_name = node->getColumnNameWithoutAlias(); + + if (array_joined_columns.count(expr_name)) + return false; + + if (key_subexpr_names.count(expr_name) == 0) + return false; + + if (out_value.isNull()) + return false; + + return transformConstantWithValidFunctions( + expr_name, out_key_column_num, out_key_column_type, out_value, out_type, [](IFunctionBase & func, const IDataType & type) + { + if (!func.hasInformationAboutMonotonicity()) + return false; + else + { + /// Range is irrelevant in this case. + auto monotonicity = func.getMonotonicityForRange(type, Field(), Field()); + if (!monotonicity.is_always_monotonic) + return false; + } + return true; + }); +} + /// Looking for possible transformation of `column = constant` into `partition_expr = function(constant)` bool KeyCondition::canConstantBeWrappedByFunctions( const ASTPtr & ast, size_t & out_key_column_num, DataTypePtr & out_key_column_type, Field & out_value, DataTypePtr & out_type) @@ -805,106 +854,14 @@ bool KeyCondition::canConstantBeWrappedByFunctions( return false; } - const auto & sample_block = key_expr->getSampleBlock(); - if (out_value.isNull()) return false; - for (const auto & node : key_expr->getNodes()) - { - auto it = key_columns.find(node.result_name); - if (it != key_columns.end()) + return transformConstantWithValidFunctions( + expr_name, out_key_column_num, out_key_column_type, out_value, out_type, [](IFunctionBase & func, const IDataType &) { - std::stack chain; - - const auto * cur_node = &node; - bool is_valid_chain = true; - - while (is_valid_chain) - { - if (cur_node->result_name == expr_name) - break; - - chain.push(cur_node); - - if (cur_node->type == ActionsDAG::ActionType::FUNCTION && cur_node->children.size() <= 2) - { - if (!cur_node->function_base->isDeterministic()) - is_valid_chain = false; - - const ActionsDAG::Node * next_node = nullptr; - for (const auto * arg : cur_node->children) - { - if (arg->column && isColumnConst(*arg->column)) - continue; - - if (next_node) - is_valid_chain = false; - - next_node = arg; - } - - if (!next_node) - is_valid_chain = false; - - cur_node = next_node; - } - else if (cur_node->type == ActionsDAG::ActionType::ALIAS) - cur_node = cur_node->children.front(); - else - is_valid_chain = false; - } - - if (is_valid_chain) - { - /// This CAST is the same as in canConstantBeWrappedByMonotonicFunctions (see comment). - auto const_type = cur_node->result_type; - auto const_column = out_type->createColumnConst(1, out_value); - auto const_value = (*castColumn({const_column, out_type, ""}, const_type))[0]; - - while (!chain.empty()) - { - const auto * func = chain.top(); - chain.pop(); - - if (func->type != ActionsDAG::ActionType::FUNCTION) - continue; - - if (func->children.size() == 1) - { - std::tie(const_value, const_type) = applyFunctionForFieldOfUnknownType(func->function_base, const_type, const_value); - } - else if (func->children.size() == 2) - { - const auto * left = func->children[0]; - const auto * right = func->children[1]; - if (left->column && isColumnConst(*left->column)) - { - auto left_arg_type = left->result_type; - auto left_arg_value = (*left->column)[0]; - std::tie(const_value, const_type) = applyBinaryFunctionForFieldOfUnknownType( - func->function_builder, left_arg_type, left_arg_value, const_type, const_value); - } - else - { - auto right_arg_type = right->result_type; - auto right_arg_value = (*right->column)[0]; - std::tie(const_value, const_type) = applyBinaryFunctionForFieldOfUnknownType( - func->function_builder, const_type, const_value, right_arg_type, right_arg_value); - } - } - } - - out_key_column_num = it->second; - out_key_column_type = sample_block.getByName(it->first).type; - out_value = const_value; - out_type = const_type; - return true; - } - } - } - - return false; + return func.isDeterministic(); + }); } bool KeyCondition::tryPrepareSetIndex( diff --git a/src/Storages/MergeTree/KeyCondition.h b/src/Storages/MergeTree/KeyCondition.h index edae921bfda..1f808b69b7f 100644 --- a/src/Storages/MergeTree/KeyCondition.h +++ b/src/Storages/MergeTree/KeyCondition.h @@ -375,6 +375,14 @@ private: DataTypePtr & out_key_column_type, std::vector & out_functions_chain); + bool transformConstantWithValidFunctions( + const String & expr_name, + size_t & out_key_column_num, + DataTypePtr & out_key_column_type, + Field & out_value, + DataTypePtr & out_type, + std::function always_monotonic) const; + bool canConstantBeWrappedByMonotonicFunctions( const ASTPtr & node, size_t & out_key_column_num, diff --git a/tests/queries/0_stateless/01480_binary_operator_monotonicity.reference b/tests/queries/0_stateless/01480_binary_operator_monotonicity.reference index 405d3348775..cd7edf71738 100644 --- a/tests/queries/0_stateless/01480_binary_operator_monotonicity.reference +++ b/tests/queries/0_stateless/01480_binary_operator_monotonicity.reference @@ -6,3 +6,4 @@ 0 0 0 +40 4 diff --git a/tests/queries/0_stateless/01480_binary_operator_monotonicity.sql b/tests/queries/0_stateless/01480_binary_operator_monotonicity.sql index 61313de4669..b49e2aa4da5 100644 --- a/tests/queries/0_stateless/01480_binary_operator_monotonicity.sql +++ b/tests/queries/0_stateless/01480_binary_operator_monotonicity.sql @@ -43,3 +43,13 @@ DROP TABLE IF EXISTS binary_op_mono5; DROP TABLE IF EXISTS binary_op_mono6; DROP TABLE IF EXISTS binary_op_mono7; DROP TABLE IF EXISTS binary_op_mono8; + +drop table if exists x; +create table x (i int, j int) engine MergeTree order by i / 10 settings index_granularity = 1; + +insert into x values (10, 1), (20, 2), (30, 3), (40, 4); + +set max_rows_to_read = 3; +select * from x where i > 30; -- converted to i / 10 >= 3, thus needs to read 3 granules. + +drop table x; From c902afddde705547cfc6e89f5c385d7eb30a6108 Mon Sep 17 00:00:00 2001 From: Vasily Nemkov Date: Fri, 5 Mar 2021 16:57:16 +0200 Subject: [PATCH 036/125] Added system.session_log table Which logs all the info about LogIn, LogOut and LogIn Failure events. Additional info that is logged: - User name - event type (LogIn, LogOut, LoginFailure) - Event date\time\time with microseconds - authentication type (same as for IDENTIFIED BY of CREATE USER statement) - array of active settings profiles upon login - array of active roles upon login - array of changed settings with corresponding values - client address and port - interface (TCP\HTTP\MySQL\PostgreSQL, etc.) - client info (name, version info) - optional LoginFailure reason text message. Added some tests to verify that events are properly saved with all necessary info via following interfaces: - TCP - HTTP - MySQL Known limitations - Not tested against named HTTP sessions, PostgreSQL and gRPC, hence those are not guaranteed to work 100% properly. --- docker/test/fasttest/run.sh | 3 + programs/local/LocalServer.cpp | 2 + programs/server/Server.cpp | 7 +- programs/server/config.xml | 8 + programs/server/users.d/session_log_test.xml | 1 + src/Access/AccessControlManager.h | 11 +- src/Access/SettingsProfilesCache.cpp | 1 - src/Access/SettingsProfilesInfo.h | 10 + src/Core/MySQL/Authentication.cpp | 3 + src/Core/MySQL/MySQLSession.h | 19 + src/Core/PostgreSQLProtocol.h | 5 +- src/Interpreters/Context.cpp | 28 +- src/Interpreters/Context.h | 15 + src/Interpreters/InterpreterSetQuery.cpp | 5 + src/Interpreters/InterpreterSystemQuery.cpp | 4 +- src/Interpreters/Session.cpp | 66 +++- src/Interpreters/Session.h | 10 +- src/Interpreters/SessionLog.cpp | 261 ++++++++++++ src/Interpreters/SessionLog.h | 74 ++++ src/Interpreters/SystemLog.cpp | 14 + src/Interpreters/SystemLog.h | 3 + src/Interpreters/ya.make | 2 + .../Formats/Impl/MySQLOutputFormat.h | 2 + src/Server/HTTPHandler.h | 3 +- src/Server/MySQLHandler.cpp | 2 +- src/TableFunctions/TableFunctionMySQL.cpp | 3 +- tests/config/install.sh | 1 + tests/config/users.d/session_log_test.xml | 30 ++ .../0_stateless/01033_quota_dcl.reference | 2 +- .../01702_system_query_log.reference | 2 +- .../01747_system_session_log_long.reference | 218 +++++++++++ .../01747_system_session_log_long.sh | 370 ++++++++++++++++++ tests/queries/skip_list.json | 4 +- 33 files changed, 1164 insertions(+), 25 deletions(-) create mode 120000 programs/server/users.d/session_log_test.xml create mode 100644 src/Core/MySQL/MySQLSession.h create mode 100644 src/Interpreters/SessionLog.cpp create mode 100644 src/Interpreters/SessionLog.h create mode 100644 tests/config/users.d/session_log_test.xml create mode 100644 tests/queries/0_stateless/01747_system_session_log_long.reference create mode 100755 tests/queries/0_stateless/01747_system_session_log_long.sh diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh index 00af261f6c8..108544779b4 100755 --- a/docker/test/fasttest/run.sh +++ b/docker/test/fasttest/run.sh @@ -399,6 +399,9 @@ function run_tests # depends on Go 02013_zlib_read_after_eof + + # Accesses CH via mysql table function (which is unavailable) + 01747_system_session_log_long ) time clickhouse-test --hung-check -j 8 --order=random --use-skip-list \ diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index 2b1b6185321..258743c7e16 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -393,6 +394,7 @@ void LocalServer::processQueries() auto context = session.makeQueryContext(); context->makeSessionContext(); /// initial_create_query requires a session context to be set. context->setCurrentQueryId(""); + applyCmdSettings(context); /// Use the same query_id (and thread group) for all queries diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index ddbc4c4e433..09b6add62d0 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -45,16 +45,21 @@ #include #include #include +#include +#include #include +#include #include +#include #include +#include #include #include #include #include #include -#include #include +#include #include #include #include diff --git a/programs/server/config.xml b/programs/server/config.xml index 510a5e230f8..98c4416da46 100644 --- a/programs/server/config.xml +++ b/programs/server/config.xml @@ -964,6 +964,14 @@ 1000 + + + system + session_log
+ + toYYYYMM(event_date) + 7500 +
+ + + + + none + + + + + + + + + + + + ::1 + 127.0.0.1 + + session_log_test_xml_profile + default + + + diff --git a/tests/queries/0_stateless/01033_quota_dcl.reference b/tests/queries/0_stateless/01033_quota_dcl.reference index 7478adac441..e732ea2fcd6 100644 --- a/tests/queries/0_stateless/01033_quota_dcl.reference +++ b/tests/queries/0_stateless/01033_quota_dcl.reference @@ -1 +1 @@ -CREATE QUOTA default KEYED BY user_name FOR INTERVAL 1 hour TRACKING ONLY TO default, readonly +CREATE QUOTA default KEYED BY user_name FOR INTERVAL 1 hour TRACKING ONLY TO default, readonly, session_log_test_xml_user diff --git a/tests/queries/0_stateless/01702_system_query_log.reference b/tests/queries/0_stateless/01702_system_query_log.reference index 1f329feac22..3458c2e5ed4 100644 --- a/tests/queries/0_stateless/01702_system_query_log.reference +++ b/tests/queries/0_stateless/01702_system_query_log.reference @@ -8,6 +8,7 @@ GRANT queries REVOKE queries Misc queries ACTUAL LOG CONTENT: + -- fire all kinds of queries and then check if those are present in the system.query_log\nSET log_comment=\'system.query_log logging test\'; Select SELECT \'DROP queries and also a cleanup before the test\'; Drop DROP DATABASE IF EXISTS sqllt SYNC; DROP USER IF EXISTS sqllt_user; @@ -82,5 +83,4 @@ Rename RENAME TABLE sqllt.table TO sqllt.table_new; Rename RENAME TABLE sqllt.table_new TO sqllt.table; Drop TRUNCATE TABLE sqllt.table; Drop DROP TABLE sqllt.table SYNC; - SET log_comment=\'\'; DROP queries and also a cleanup after the test diff --git a/tests/queries/0_stateless/01747_system_session_log_long.reference b/tests/queries/0_stateless/01747_system_session_log_long.reference new file mode 100644 index 00000000000..9ecf7e05421 --- /dev/null +++ b/tests/queries/0_stateless/01747_system_session_log_long.reference @@ -0,0 +1,218 @@ + +# no_password - User with profile from XML +TCP endpoint +TCP 'wrong password' case is skipped for no_password. +HTTP endpoint +HTTP 'wrong password' case is skipped for no_password. +MySQL endpoint +MySQL 'wrong password' case is skipped for no_password. + +# no_password - No profiles no roles +TCP endpoint +TCP 'wrong password' case is skipped for no_password. +HTTP endpoint +HTTP 'wrong password' case is skipped for no_password. +MySQL endpoint +MySQL 'wrong password' case is skipped for no_password. + +# no_password - Two profiles, no roles +TCP endpoint +TCP 'wrong password' case is skipped for no_password. +HTTP endpoint +HTTP 'wrong password' case is skipped for no_password. +MySQL endpoint +MySQL 'wrong password' case is skipped for no_password. + +# no_password - Two profiles and two simple roles +TCP endpoint +TCP 'wrong password' case is skipped for no_password. +HTTP endpoint +HTTP 'wrong password' case is skipped for no_password. +MySQL endpoint +MySQL 'wrong password' case is skipped for no_password. + +# plaintext_password - No profiles no roles +TCP endpoint +HTTP endpoint +MySQL endpoint + +# plaintext_password - Two profiles, no roles +TCP endpoint +HTTP endpoint +MySQL endpoint + +# plaintext_password - Two profiles and two simple roles +TCP endpoint +HTTP endpoint +MySQL endpoint + +# sha256_password - No profiles no roles +TCP endpoint +HTTP endpoint +MySQL endpoint +MySQL 'successful login' case is skipped for sha256_password. + +# sha256_password - Two profiles, no roles +TCP endpoint +HTTP endpoint +MySQL endpoint +MySQL 'successful login' case is skipped for sha256_password. + +# sha256_password - Two profiles and two simple roles +TCP endpoint +HTTP endpoint +MySQL endpoint +MySQL 'successful login' case is skipped for sha256_password. + +# double_sha1_password - No profiles no roles +TCP endpoint +HTTP endpoint +MySQL endpoint + +# double_sha1_password - Two profiles, no roles +TCP endpoint +HTTP endpoint +MySQL endpoint + +# double_sha1_password - Two profiles and two simple roles +TCP endpoint +HTTP endpoint +MySQL endpoint +${BASE_USERNAME}_double_sha1_password_no_profiles_no_roles TCP LoginFailure 1 +${BASE_USERNAME}_double_sha1_password_no_profiles_no_roles TCP LoginSuccess 1 +${BASE_USERNAME}_double_sha1_password_no_profiles_no_roles TCP Logout 1 +${BASE_USERNAME}_double_sha1_password_no_profiles_no_roles HTTP LoginFailure 1 +${BASE_USERNAME}_double_sha1_password_no_profiles_no_roles HTTP LoginSuccess 1 +${BASE_USERNAME}_double_sha1_password_no_profiles_no_roles HTTP Logout 1 +${BASE_USERNAME}_double_sha1_password_no_profiles_no_roles MySQL LoginFailure many +${BASE_USERNAME}_double_sha1_password_no_profiles_no_roles MySQL LoginSuccess 1 +${BASE_USERNAME}_double_sha1_password_no_profiles_no_roles MySQL Logout 1 +${BASE_USERNAME}_double_sha1_password_two_profiles_no_roles TCP LoginFailure 1 +${BASE_USERNAME}_double_sha1_password_two_profiles_no_roles TCP LoginSuccess 1 +${BASE_USERNAME}_double_sha1_password_two_profiles_no_roles TCP Logout 1 +${BASE_USERNAME}_double_sha1_password_two_profiles_no_roles HTTP LoginFailure 1 +${BASE_USERNAME}_double_sha1_password_two_profiles_no_roles HTTP LoginSuccess 1 +${BASE_USERNAME}_double_sha1_password_two_profiles_no_roles HTTP Logout 1 +${BASE_USERNAME}_double_sha1_password_two_profiles_no_roles MySQL LoginFailure many +${BASE_USERNAME}_double_sha1_password_two_profiles_no_roles MySQL LoginSuccess 1 +${BASE_USERNAME}_double_sha1_password_two_profiles_no_roles MySQL Logout 1 +${BASE_USERNAME}_double_sha1_password_two_profiles_two_roles TCP LoginFailure 1 +${BASE_USERNAME}_double_sha1_password_two_profiles_two_roles TCP LoginSuccess 1 +${BASE_USERNAME}_double_sha1_password_two_profiles_two_roles TCP Logout 1 +${BASE_USERNAME}_double_sha1_password_two_profiles_two_roles HTTP LoginFailure 1 +${BASE_USERNAME}_double_sha1_password_two_profiles_two_roles HTTP LoginSuccess 1 +${BASE_USERNAME}_double_sha1_password_two_profiles_two_roles HTTP Logout 1 +${BASE_USERNAME}_double_sha1_password_two_profiles_two_roles MySQL LoginFailure many +${BASE_USERNAME}_double_sha1_password_two_profiles_two_roles MySQL LoginSuccess 1 +${BASE_USERNAME}_double_sha1_password_two_profiles_two_roles MySQL Logout 1 +${BASE_USERNAME}_no_password_no_profiles_no_roles TCP LoginSuccess 1 +${BASE_USERNAME}_no_password_no_profiles_no_roles TCP Logout 1 +${BASE_USERNAME}_no_password_no_profiles_no_roles HTTP LoginSuccess 1 +${BASE_USERNAME}_no_password_no_profiles_no_roles HTTP Logout 1 +${BASE_USERNAME}_no_password_no_profiles_no_roles MySQL LoginSuccess 1 +${BASE_USERNAME}_no_password_no_profiles_no_roles MySQL Logout 1 +${BASE_USERNAME}_no_password_two_profiles_no_roles TCP LoginSuccess 1 +${BASE_USERNAME}_no_password_two_profiles_no_roles TCP Logout 1 +${BASE_USERNAME}_no_password_two_profiles_no_roles HTTP LoginSuccess 1 +${BASE_USERNAME}_no_password_two_profiles_no_roles HTTP Logout 1 +${BASE_USERNAME}_no_password_two_profiles_no_roles MySQL LoginSuccess 1 +${BASE_USERNAME}_no_password_two_profiles_no_roles MySQL Logout 1 +${BASE_USERNAME}_no_password_two_profiles_two_roles TCP LoginSuccess 1 +${BASE_USERNAME}_no_password_two_profiles_two_roles TCP Logout 1 +${BASE_USERNAME}_no_password_two_profiles_two_roles HTTP LoginSuccess 1 +${BASE_USERNAME}_no_password_two_profiles_two_roles HTTP Logout 1 +${BASE_USERNAME}_no_password_two_profiles_two_roles MySQL LoginSuccess 1 +${BASE_USERNAME}_no_password_two_profiles_two_roles MySQL Logout 1 +${BASE_USERNAME}_plaintext_password_no_profiles_no_roles TCP LoginFailure 1 +${BASE_USERNAME}_plaintext_password_no_profiles_no_roles TCP LoginSuccess 1 +${BASE_USERNAME}_plaintext_password_no_profiles_no_roles TCP Logout 1 +${BASE_USERNAME}_plaintext_password_no_profiles_no_roles HTTP LoginFailure 1 +${BASE_USERNAME}_plaintext_password_no_profiles_no_roles HTTP LoginSuccess 1 +${BASE_USERNAME}_plaintext_password_no_profiles_no_roles HTTP Logout 1 +${BASE_USERNAME}_plaintext_password_no_profiles_no_roles MySQL LoginFailure many +${BASE_USERNAME}_plaintext_password_no_profiles_no_roles MySQL LoginSuccess 1 +${BASE_USERNAME}_plaintext_password_no_profiles_no_roles MySQL Logout 1 +${BASE_USERNAME}_plaintext_password_two_profiles_no_roles TCP LoginFailure 1 +${BASE_USERNAME}_plaintext_password_two_profiles_no_roles TCP LoginSuccess 1 +${BASE_USERNAME}_plaintext_password_two_profiles_no_roles TCP Logout 1 +${BASE_USERNAME}_plaintext_password_two_profiles_no_roles HTTP LoginFailure 1 +${BASE_USERNAME}_plaintext_password_two_profiles_no_roles HTTP LoginSuccess 1 +${BASE_USERNAME}_plaintext_password_two_profiles_no_roles HTTP Logout 1 +${BASE_USERNAME}_plaintext_password_two_profiles_no_roles MySQL LoginFailure many +${BASE_USERNAME}_plaintext_password_two_profiles_no_roles MySQL LoginSuccess 1 +${BASE_USERNAME}_plaintext_password_two_profiles_no_roles MySQL Logout 1 +${BASE_USERNAME}_plaintext_password_two_profiles_two_roles TCP LoginFailure 1 +${BASE_USERNAME}_plaintext_password_two_profiles_two_roles TCP LoginSuccess 1 +${BASE_USERNAME}_plaintext_password_two_profiles_two_roles TCP Logout 1 +${BASE_USERNAME}_plaintext_password_two_profiles_two_roles HTTP LoginFailure 1 +${BASE_USERNAME}_plaintext_password_two_profiles_two_roles HTTP LoginSuccess 1 +${BASE_USERNAME}_plaintext_password_two_profiles_two_roles HTTP Logout 1 +${BASE_USERNAME}_plaintext_password_two_profiles_two_roles MySQL LoginFailure many +${BASE_USERNAME}_plaintext_password_two_profiles_two_roles MySQL LoginSuccess 1 +${BASE_USERNAME}_plaintext_password_two_profiles_two_roles MySQL Logout 1 +${BASE_USERNAME}_sha256_password_no_profiles_no_roles TCP LoginFailure 1 +${BASE_USERNAME}_sha256_password_no_profiles_no_roles TCP LoginSuccess 1 +${BASE_USERNAME}_sha256_password_no_profiles_no_roles TCP Logout 1 +${BASE_USERNAME}_sha256_password_no_profiles_no_roles HTTP LoginFailure 1 +${BASE_USERNAME}_sha256_password_no_profiles_no_roles HTTP LoginSuccess 1 +${BASE_USERNAME}_sha256_password_no_profiles_no_roles HTTP Logout 1 +${BASE_USERNAME}_sha256_password_no_profiles_no_roles MySQL LoginFailure many +${BASE_USERNAME}_sha256_password_two_profiles_no_roles TCP LoginFailure 1 +${BASE_USERNAME}_sha256_password_two_profiles_no_roles TCP LoginSuccess 1 +${BASE_USERNAME}_sha256_password_two_profiles_no_roles TCP Logout 1 +${BASE_USERNAME}_sha256_password_two_profiles_no_roles HTTP LoginFailure 1 +${BASE_USERNAME}_sha256_password_two_profiles_no_roles HTTP LoginSuccess 1 +${BASE_USERNAME}_sha256_password_two_profiles_no_roles HTTP Logout 1 +${BASE_USERNAME}_sha256_password_two_profiles_no_roles MySQL LoginFailure many +${BASE_USERNAME}_sha256_password_two_profiles_two_roles TCP LoginFailure 1 +${BASE_USERNAME}_sha256_password_two_profiles_two_roles TCP LoginSuccess 1 +${BASE_USERNAME}_sha256_password_two_profiles_two_roles TCP Logout 1 +${BASE_USERNAME}_sha256_password_two_profiles_two_roles HTTP LoginFailure 1 +${BASE_USERNAME}_sha256_password_two_profiles_two_roles HTTP LoginSuccess 1 +${BASE_USERNAME}_sha256_password_two_profiles_two_roles HTTP Logout 1 +${BASE_USERNAME}_sha256_password_two_profiles_two_roles MySQL LoginFailure many +invalid_${BASE_USERNAME}_double_sha1_password_no_profiles_no_roles TCP LoginFailure 1 +invalid_${BASE_USERNAME}_double_sha1_password_no_profiles_no_roles HTTP LoginFailure 1 +invalid_${BASE_USERNAME}_double_sha1_password_no_profiles_no_roles MySQL LoginFailure many +invalid_${BASE_USERNAME}_double_sha1_password_two_profiles_no_roles TCP LoginFailure 1 +invalid_${BASE_USERNAME}_double_sha1_password_two_profiles_no_roles HTTP LoginFailure 1 +invalid_${BASE_USERNAME}_double_sha1_password_two_profiles_no_roles MySQL LoginFailure many +invalid_${BASE_USERNAME}_double_sha1_password_two_profiles_two_roles TCP LoginFailure 1 +invalid_${BASE_USERNAME}_double_sha1_password_two_profiles_two_roles HTTP LoginFailure 1 +invalid_${BASE_USERNAME}_double_sha1_password_two_profiles_two_roles MySQL LoginFailure many +invalid_${BASE_USERNAME}_no_password_no_profiles_no_roles TCP LoginFailure 1 +invalid_${BASE_USERNAME}_no_password_no_profiles_no_roles HTTP LoginFailure 1 +invalid_${BASE_USERNAME}_no_password_no_profiles_no_roles MySQL LoginFailure many +invalid_${BASE_USERNAME}_no_password_two_profiles_no_roles TCP LoginFailure 1 +invalid_${BASE_USERNAME}_no_password_two_profiles_no_roles HTTP LoginFailure 1 +invalid_${BASE_USERNAME}_no_password_two_profiles_no_roles MySQL LoginFailure many +invalid_${BASE_USERNAME}_no_password_two_profiles_two_roles TCP LoginFailure 1 +invalid_${BASE_USERNAME}_no_password_two_profiles_two_roles HTTP LoginFailure 1 +invalid_${BASE_USERNAME}_no_password_two_profiles_two_roles MySQL LoginFailure many +invalid_${BASE_USERNAME}_plaintext_password_no_profiles_no_roles TCP LoginFailure 1 +invalid_${BASE_USERNAME}_plaintext_password_no_profiles_no_roles HTTP LoginFailure 1 +invalid_${BASE_USERNAME}_plaintext_password_no_profiles_no_roles MySQL LoginFailure many +invalid_${BASE_USERNAME}_plaintext_password_two_profiles_no_roles TCP LoginFailure 1 +invalid_${BASE_USERNAME}_plaintext_password_two_profiles_no_roles HTTP LoginFailure 1 +invalid_${BASE_USERNAME}_plaintext_password_two_profiles_no_roles MySQL LoginFailure many +invalid_${BASE_USERNAME}_plaintext_password_two_profiles_two_roles TCP LoginFailure 1 +invalid_${BASE_USERNAME}_plaintext_password_two_profiles_two_roles HTTP LoginFailure 1 +invalid_${BASE_USERNAME}_plaintext_password_two_profiles_two_roles MySQL LoginFailure many +invalid_${BASE_USERNAME}_sha256_password_no_profiles_no_roles TCP LoginFailure 1 +invalid_${BASE_USERNAME}_sha256_password_no_profiles_no_roles HTTP LoginFailure 1 +invalid_${BASE_USERNAME}_sha256_password_no_profiles_no_roles MySQL LoginFailure many +invalid_${BASE_USERNAME}_sha256_password_two_profiles_no_roles TCP LoginFailure 1 +invalid_${BASE_USERNAME}_sha256_password_two_profiles_no_roles HTTP LoginFailure 1 +invalid_${BASE_USERNAME}_sha256_password_two_profiles_no_roles MySQL LoginFailure many +invalid_${BASE_USERNAME}_sha256_password_two_profiles_two_roles TCP LoginFailure 1 +invalid_${BASE_USERNAME}_sha256_password_two_profiles_two_roles HTTP LoginFailure 1 +invalid_${BASE_USERNAME}_sha256_password_two_profiles_two_roles MySQL LoginFailure many +invalid_session_log_test_xml_user TCP LoginFailure 1 +invalid_session_log_test_xml_user HTTP LoginFailure 1 +invalid_session_log_test_xml_user MySQL LoginFailure many +session_log_test_xml_user TCP LoginSuccess 1 +session_log_test_xml_user TCP Logout 1 +session_log_test_xml_user HTTP LoginSuccess 1 +session_log_test_xml_user HTTP Logout 1 +session_log_test_xml_user MySQL LoginSuccess 1 +session_log_test_xml_user MySQL Logout 1 diff --git a/tests/queries/0_stateless/01747_system_session_log_long.sh b/tests/queries/0_stateless/01747_system_session_log_long.sh new file mode 100755 index 00000000000..16b32a08442 --- /dev/null +++ b/tests/queries/0_stateless/01747_system_session_log_long.sh @@ -0,0 +1,370 @@ +#!/usr/bin/env bash + +################################################################################################## +# Verify that login, logout, and login failure events are properly stored in system.session_log +# when different `IDENTIFIED BY` clauses are used on user. +# +# Make sure that system.session_log entries are non-empty and provide enough info on each event. +# +# Using multiple protocols +# * native TCP protocol with CH client +# * HTTP with CURL +# * MySQL - CH server accesses itself via mysql table function, query typically fails (unrelated) +# but auth should be performed properly. +# * PostgreSQL - CH server accesses itself via postgresql table function (currently out of order). +# * gRPC - not done yet +# +# There is way to control how many time a query (e.g. via mysql table function) is retried +# and hence variable number of records in session_log. To mitigate this and simplify final query, +# each auth_type is tested for separate user. That way SELECT DISTINCT doesn't exclude log entries +# from different cases. +# +# All created users added to the ALL_USERNAMES and later cleaned up. +################################################################################################## + +# To minimize amount of error context sent on failed queries when talking to CH via MySQL protocol. +export CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL=none + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +set -eu + +# Since there is no way to cleanup system.session_log table, +# make sure that we can identify log entries from this test by a random user name. +readonly BASE_USERNAME="session_log_test_user_$(cat /dev/urandom | tr -cd 'a-f0-9' | head -c 32)" +readonly TMP_QUERY_FILE=$(mktemp /tmp/tmp_query.log.XXXXXX) +declare -a ALL_USERNAMES +ALL_USERNAMES+=("${BASE_USERNAME}") + +function reportError() +{ + if [ -s "${TMP_QUERY_FILE}" ] ; + then + echo "!!!!!! ERROR ${CLICKHOUSE_CLIENT} ${*} --queries-file ${TMP_QUERY_FILE}" >&2 + echo "query:" >&2 + cat "${TMP_QUERY_FILE}" >&2 + rm -f "${TMP_QUERY_FILE}" + fi +} + +function executeQuery() +{ + ## Execute query (provided via heredoc or herestring) and print query in case of error. + trap 'rm -f ${TMP_QUERY_FILE}; trap - ERR RETURN' RETURN + # Since we want to report with current values supplied to this function call + # shellcheck disable=SC2064 + trap "reportError $*" ERR + + cat - > "${TMP_QUERY_FILE}" + ${CLICKHOUSE_CLIENT} "${@}" --queries-file "${TMP_QUERY_FILE}" +} + +function cleanup() +{ + local usernames_to_cleanup + usernames_to_cleanup="$(IFS=, ; echo "${ALL_USERNAMES[*]}")" + executeQuery < "${TMP_QUERY_FILE}" + ! ${CLICKHOUSE_CLIENT} "${@}" --multiquery --queries-file "${TMP_QUERY_FILE}" 2>&1 | tee -a ${TMP_QUERY_FILE} +} + +function createUser() +{ + local auth_type="${1}" + local username="${2}" + local password="${3}" + + if [[ "${auth_type}" == "no_password" ]] + then + password="" + + elif [[ "${auth_type}" == "plaintext_password" ]] + then + password="${password}" + + elif [[ "${auth_type}" == "sha256_password" ]] + then + password="$(executeQuery <<< "SELECT hex(SHA256('${password}'))")" + + elif [[ "${auth_type}" == "double_sha1_password" ]] + then + password="$(executeQuery <<< "SELECT hex(SHA1(SHA1('${password}')))")" + + else + echo "Invalid auth_type: ${auth_type}" >&2 + exit 1 + fi + + export RESULTING_PASS="${password}" + if [ -n "${password}" ] + then + password="BY '${password}'" + fi + + executeQuery < 1, 'many', toString(count(*))) -- do not rely on count value since MySQL does arbitrary number of retries +FROM + system.session_log +WHERE + (user LIKE '%session_log_test_xml_user%' OR user LIKE '%${BASE_USERNAME}%') + AND + event_time_microseconds >= test_start_time +GROUP BY + user_name, interface, type +ORDER BY + user_name, interface, type; +EOF \ No newline at end of file diff --git a/tests/queries/skip_list.json b/tests/queries/skip_list.json index 0143cc78dbe..91fca7eb5d5 100644 --- a/tests/queries/skip_list.json +++ b/tests/queries/skip_list.json @@ -466,7 +466,7 @@ "polygon_dicts", // they use an explicitly specified database "01658_read_file_to_stringcolumn", "01721_engine_file_truncate_on_insert", // It's ok to execute in parallel but not several instances of the same test. - "01702_system_query_log", // It's ok to execute in parallel with oter tests but not several instances of the same test. + "01702_system_query_log", // It's ok to execute in parallel but not several instances of the same test. "01748_dictionary_table_dot", // creates database "00950_dict_get", "01615_random_one_shard_insertion", @@ -514,5 +514,7 @@ "02001_add_default_database_to_system_users", ///create user "02002_row_level_filter_bug", ///create user "02015_system_views" + "02002_row_level_filter_bug", ///create user + "01747_system_session_log_long" // Reads from system.session_log and can't be run in parallel with any other test (since almost any other test writes to session_log) ] } From 3ca0b0c8605f1933d237c870b0038d40401331d4 Mon Sep 17 00:00:00 2001 From: Vasily Nemkov Date: Mon, 30 Aug 2021 19:08:02 +0300 Subject: [PATCH 037/125] Fixed GCC-9 build --- src/Interpreters/SessionLog.cpp | 8 ++++---- src/Interpreters/SessionLog.h | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/Interpreters/SessionLog.cpp b/src/Interpreters/SessionLog.cpp index 2d2f1358656..4967cb867c8 100644 --- a/src/Interpreters/SessionLog.cpp +++ b/src/Interpreters/SessionLog.cpp @@ -202,11 +202,11 @@ void SessionLogElement::appendToBlock(MutableColumns & columns) const columns[i++]->insertData(auth_failure_reason.data(), auth_failure_reason.length()); } -void SessionLog::addLoginSuccess(const UUID & session_id, std::optional session_name, const Context & context) +void SessionLog::addLoginSuccess(const UUID & session_id, std::optional session_name, const Context & login_context) { - const auto access = context.getAccess(); - const auto & settings = context.getSettingsRef(); - const auto & client_info = context.getClientInfo(); + const auto access = login_context.getAccess(); + const auto & settings = login_context.getSettingsRef(); + const auto & client_info = login_context.getClientInfo(); DB::SessionLogElement log_entry(session_id, SESSION_LOGIN_SUCCESS); log_entry.client_info = client_info; diff --git a/src/Interpreters/SessionLog.h b/src/Interpreters/SessionLog.h index 2530809f9f9..fddabf45e4e 100644 --- a/src/Interpreters/SessionLog.h +++ b/src/Interpreters/SessionLog.h @@ -66,7 +66,7 @@ class SessionLog : public SystemLog using SystemLog::SystemLog; public: - void addLoginSuccess(const UUID & session_id, std::optional session_name, const Context & context); + void addLoginSuccess(const UUID & session_id, std::optional session_name, const Context & login_context); void addLoginFailure(const UUID & session_id, const ClientInfo & info, const String & user, const Exception & reason); void addLogOut(const UUID & session_id, const String & user, const ClientInfo & client_info); }; From 335b6f12fd06dda1411dc2c789d306cc14df4301 Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 30 Aug 2021 20:55:23 +0300 Subject: [PATCH 038/125] Fix --- cmake/find/amqpcpp.cmake | 2 +- contrib/amqpcpp-cmake/CMakeLists.txt | 4 +- contrib/boringssl-cmake/CMakeLists.txt | 4 +- .../table-engines/integrations/rabbitmq.md | 8 ++- src/Storages/RabbitMQ/RabbitMQSettings.h | 2 + src/Storages/RabbitMQ/StorageRabbitMQ.cpp | 60 ++++++------------- src/Storages/RabbitMQ/StorageRabbitMQ.h | 2 + 7 files changed, 32 insertions(+), 50 deletions(-) diff --git a/cmake/find/amqpcpp.cmake b/cmake/find/amqpcpp.cmake index a4a58349508..05e5d2da751 100644 --- a/cmake/find/amqpcpp.cmake +++ b/cmake/find/amqpcpp.cmake @@ -17,7 +17,7 @@ if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/AMQP-CPP/CMakeLists.txt") endif () set (USE_AMQPCPP 1) -set (AMQPCPP_LIBRARY amqp-cpp) +set (AMQPCPP_LIBRARY amqp-cpp ${OPENSSL_LIBRARIES}) set (AMQPCPP_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/AMQP-CPP/include") list (APPEND AMQPCPP_INCLUDE_DIR diff --git a/contrib/amqpcpp-cmake/CMakeLists.txt b/contrib/amqpcpp-cmake/CMakeLists.txt index 5637db4cf41..faef7bd4a1c 100644 --- a/contrib/amqpcpp-cmake/CMakeLists.txt +++ b/contrib/amqpcpp-cmake/CMakeLists.txt @@ -41,6 +41,4 @@ target_compile_options (amqp-cpp ) target_include_directories (amqp-cpp SYSTEM PUBLIC "${LIBRARY_DIR}/include") - -target_link_libraries (amqp-cpp PUBLIC ssl) - +target_link_libraries(amqp-cpp PUBLIC ${OPENSSL_SSL_LIBRARY} ${OPENSSL_CRYPTO_LIBRARY}) diff --git a/contrib/boringssl-cmake/CMakeLists.txt b/contrib/boringssl-cmake/CMakeLists.txt index 9d8c6ca6083..4502d6e9d42 100644 --- a/contrib/boringssl-cmake/CMakeLists.txt +++ b/contrib/boringssl-cmake/CMakeLists.txt @@ -15,12 +15,12 @@ if(CMAKE_CXX_COMPILER_ID MATCHES "Clang") endif() if(CMAKE_COMPILER_IS_GNUCXX OR CLANG) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -fvisibility=hidden -fno-common -fno-exceptions -fno-rtti") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -fno-common -fno-exceptions -fno-rtti") if(APPLE AND CLANG) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libc++") endif() - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fvisibility=hidden -fno-common") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fno-common") if((CMAKE_C_COMPILER_VERSION VERSION_GREATER "4.8.99") OR CLANG) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=c11") else() diff --git a/docs/en/engines/table-engines/integrations/rabbitmq.md b/docs/en/engines/table-engines/integrations/rabbitmq.md index 5fb9ce5b151..a3ee1115c00 100644 --- a/docs/en/engines/table-engines/integrations/rabbitmq.md +++ b/docs/en/engines/table-engines/integrations/rabbitmq.md @@ -21,11 +21,12 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2], ... ) ENGINE = RabbitMQ SETTINGS - rabbitmq_host_port = 'host:port', + rabbitmq_host_port = 'host:port' [or rabbitmq_address = 'amqp(s)://guest:guest@localhost/vhost'], rabbitmq_exchange_name = 'exchange_name', rabbitmq_format = 'data_format'[,] [rabbitmq_exchange_type = 'exchange_type',] [rabbitmq_routing_key_list = 'key1,key2,...',] + [rabbitmq_secure = 0,] [rabbitmq_row_delimiter = 'delimiter_symbol',] [rabbitmq_schema = '',] [rabbitmq_num_consumers = N,] @@ -59,6 +60,11 @@ Optional parameters: - `rabbitmq_max_block_size` - `rabbitmq_flush_interval_ms` +SSL connection: + +Use either `rabbitmq_secure = 1` or `amqps` in connection address: `rabbitmq_address = 'amqps://guest:guest@localhost/vhost'`. +The default behaviour of the used library is not to check if the created TLS connection is sufficiently secure. Whether the certificate is expired, self-signed, missing or invalid: the connection is simply permitted. More strict checking of certificates can possibly be implemented in the future. + Also format settings can be added along with rabbitmq-related settings. Example: diff --git a/src/Storages/RabbitMQ/RabbitMQSettings.h b/src/Storages/RabbitMQ/RabbitMQSettings.h index ff7c79b89c6..01109dda66a 100644 --- a/src/Storages/RabbitMQ/RabbitMQSettings.h +++ b/src/Storages/RabbitMQ/RabbitMQSettings.h @@ -20,6 +20,8 @@ namespace DB M(UInt64, rabbitmq_num_queues, 1, "The number of queues per consumer.", 0) \ M(String, rabbitmq_queue_base, "", "Base for queue names to be able to reopen non-empty queues in case of failure.", 0) \ M(Bool, rabbitmq_persistent, false, "For insert query messages will be made 'persistent', durable.", 0) \ + M(Bool, rabbitmq_secure, false, "Use SSL connection", 0) \ + M(String, rabbitmq_address, "", "Address for connection", 0) \ M(UInt64, rabbitmq_skip_broken_messages, 0, "Skip at least this number of broken messages from RabbitMQ per block", 0) \ M(UInt64, rabbitmq_max_block_size, 0, "Number of row collected before flushing data from RabbitMQ.", 0) \ M(Milliseconds, rabbitmq_flush_interval_ms, 0, "Timeout for flushing data from RabbitMQ.", 0) \ diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp index 419071ba642..ce7e5941b68 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp @@ -97,12 +97,16 @@ StorageRabbitMQ::StorageRabbitMQ( getContext()->getConfigRef().getString("rabbitmq.username"), getContext()->getConfigRef().getString("rabbitmq.password"))) , vhost(getContext()->getConfigRef().getString("rabbitmq.vhost", rabbitmq_settings->rabbitmq_vhost.value)) + , connection_string(rabbitmq_settings->rabbitmq_address) + , secure(rabbitmq_settings->rabbitmq_secure.value) , semaphore(0, num_consumers) , unique_strbase(getRandomName()) , queue_size(std::max(QUEUE_SIZE, static_cast(getMaxBlockSize()))) , milliseconds_to_wait(RESCHEDULE_MS) { event_handler = std::make_shared(loop.getLoop(), log); + if (secure) + SSL_library_init(); restoreConnection(false); StorageInMemoryMetadata storage_metadata; @@ -528,10 +532,10 @@ bool StorageRabbitMQ::restoreConnection(bool reconnecting) LOG_TRACE(log, "Trying to restore connection to " + address); } - connection = std::make_unique(event_handler.get(), - AMQP::Address( - parsed_address.first, parsed_address.second, - AMQP::Login(login_password.first, login_password.second), vhost)); + auto amqp_address = connection_string.empty() ? AMQP::Address(parsed_address.first, parsed_address.second, + AMQP::Login(login_password.first, login_password.second), vhost, secure) + : AMQP::Address(connection_string); + connection = std::make_unique(event_handler.get(), amqp_address); cnt_retries = 0; while (!connection->ready() && !stream_cancelled && cnt_retries++ != RETRIES_MAX) @@ -1053,50 +1057,20 @@ void registerStorageRabbitMQ(StorageFactory & factory) { auto creator_fn = [](const StorageFactory::Arguments & args) { - ASTs & engine_args = args.engine_args; - size_t args_count = engine_args.size(); - bool has_settings = args.storage_def->settings; auto rabbitmq_settings = std::make_unique(); - if (has_settings) - rabbitmq_settings->loadFromQuery(*args.storage_def); + if (!args.storage_def->settings) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "RabbitMQ engine must have settings"); - // Check arguments and settings - #define CHECK_RABBITMQ_STORAGE_ARGUMENT(ARG_NUM, ARG_NAME) \ - /* One of the three required arguments is not specified */ \ - if (args_count < (ARG_NUM) && (ARG_NUM) <= 2 && !rabbitmq_settings->ARG_NAME.changed) \ - { \ - throw Exception("Required parameter '" #ARG_NAME "' for storage RabbitMQ not specified", \ - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); \ - } \ - if (args_count >= (ARG_NUM)) \ - { \ - if (rabbitmq_settings->ARG_NAME.changed) /* The same argument is given in two places */ \ - { \ - throw Exception("The argument №" #ARG_NUM " of storage RabbitMQ " \ - "and the parameter '" #ARG_NAME "' is duplicated", ErrorCodes::BAD_ARGUMENTS); \ - } \ - } + rabbitmq_settings->loadFromQuery(*args.storage_def); - CHECK_RABBITMQ_STORAGE_ARGUMENT(1, rabbitmq_host_port) - CHECK_RABBITMQ_STORAGE_ARGUMENT(2, rabbitmq_format) - CHECK_RABBITMQ_STORAGE_ARGUMENT(3, rabbitmq_exchange_name) - CHECK_RABBITMQ_STORAGE_ARGUMENT(4, rabbitmq_exchange_type) - CHECK_RABBITMQ_STORAGE_ARGUMENT(5, rabbitmq_routing_key_list) - CHECK_RABBITMQ_STORAGE_ARGUMENT(6, rabbitmq_row_delimiter) - CHECK_RABBITMQ_STORAGE_ARGUMENT(7, rabbitmq_schema) - CHECK_RABBITMQ_STORAGE_ARGUMENT(8, rabbitmq_num_consumers) - CHECK_RABBITMQ_STORAGE_ARGUMENT(9, rabbitmq_num_queues) - CHECK_RABBITMQ_STORAGE_ARGUMENT(10, rabbitmq_queue_base) - CHECK_RABBITMQ_STORAGE_ARGUMENT(11, rabbitmq_persistent) - CHECK_RABBITMQ_STORAGE_ARGUMENT(12, rabbitmq_skip_broken_messages) - CHECK_RABBITMQ_STORAGE_ARGUMENT(13, rabbitmq_max_block_size) - CHECK_RABBITMQ_STORAGE_ARGUMENT(14, rabbitmq_flush_interval_ms) - CHECK_RABBITMQ_STORAGE_ARGUMENT(15, rabbitmq_vhost) - CHECK_RABBITMQ_STORAGE_ARGUMENT(16, rabbitmq_queue_settings_list) - CHECK_RABBITMQ_STORAGE_ARGUMENT(17, rabbitmq_queue_consume) + if (!rabbitmq_settings->rabbitmq_host_port.changed + && !rabbitmq_settings->rabbitmq_address.changed) + throw Exception("You must speicify either `rabbitmq_host_port` or `rabbitmq_address` settings", + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - #undef CHECK_RABBITMQ_STORAGE_ARGUMENT + if (!rabbitmq_settings->rabbitmq_format.changed) + throw Exception("You must speicify `rabbitmq_format` setting", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); return StorageRabbitMQ::create(args.table_id, args.getContext(), args.columns, std::move(rabbitmq_settings)); }; diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.h b/src/Storages/RabbitMQ/StorageRabbitMQ.h index 1a2445f3690..48a907cab2c 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.h +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.h @@ -107,6 +107,8 @@ private: std::pair parsed_address; std::pair login_password; String vhost; + String connection_string; + bool secure; UVLoop loop; std::shared_ptr event_handler; From c1482ee45b448901f803cf3b558877747c469189 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Mon, 30 Aug 2021 22:42:35 +0300 Subject: [PATCH 039/125] CHJIT added PageArena --- src/Interpreters/JIT/CHJIT.cpp | 291 +++++++++++++++++++++------------ 1 file changed, 188 insertions(+), 103 deletions(-) diff --git a/src/Interpreters/JIT/CHJIT.cpp b/src/Interpreters/JIT/CHJIT.cpp index 04625ada9d1..33c52548a70 100644 --- a/src/Interpreters/JIT/CHJIT.cpp +++ b/src/Interpreters/JIT/CHJIT.cpp @@ -116,29 +116,122 @@ private: */ class JITModuleMemoryManager { - class DefaultMMapper final : public llvm::SectionMemoryManager::MemoryMapper + + class PageBlock { public: - llvm::sys::MemoryBlock allocateMappedMemory( - llvm::SectionMemoryManager::AllocationPurpose, - size_t NumBytes, - const llvm::sys::MemoryBlock * const, - unsigned Flags, - std::error_code & EC) override + PageBlock(void * pages_base_, size_t pages_size_, size_t page_size_) + : pages_base(pages_base_) + , pages_size(pages_size_) + , page_size(page_size_) + {} + + inline void * base() const { return pages_base; } + inline size_t pagesSize() const { return pages_size; } + inline size_t pageSize() const { return page_size; } + inline size_t blockSize() const { return pages_size * page_size; } + + private: + void * pages_base; + size_t pages_size; + size_t page_size; + }; + + class PageArena + { + public: + + PageArena() + : page_size(::getPageSize()) { - EC = std::error_code(); - if (NumBytes == 0) - return llvm::sys::MemoryBlock(); + allocateNextPageBlock(); + } - int protection_flags = getPosixProtectionFlags(Flags); + char * allocate(size_t size, size_t alignment) + { + for (size_t i = 0; i < page_blocks.size(); ++i) + { + char * result = allocateFromPageBlocks(size, alignment, i); + if (result) + return result; + } -#if defined(__NetBSD__) && defined(PROT_MPROTECT) - protection_flags |= PROT_MPROTECT(PROT_READ | PROT_WRITE | PROT_EXEC); -#endif + while (true) + { + allocateNextPageBlock(); + size_t allocated_page_index = page_blocks.size() - 1; + char * result = allocateFromPageBlocks(size, alignment, allocated_page_index); + if (result) + return result; + } + } - auto page_size = getPageSize(); - auto num_pages = (NumBytes + page_size - 1) / page_size; - auto allocate_size = num_pages * page_size; + inline size_t getAllocatedSize() const + { + return allocated_size; + } + + inline size_t getPageSize() const + { + return page_size; + } + + ~PageArena() + { + for (auto & page_block : page_blocks) + free(page_block.base()); + } + + const std::vector & getPageBlocks() const + { + return page_blocks; + } + + private: + + std::vector page_blocks; + + std::vector page_blocks_allocated_size; + + size_t page_size = 0; + + size_t allocated_size = 0; + + char * allocateFromPageBlocks(size_t size, size_t alignment, size_t page_blocks_index) + { + assert(page_blocks_index < page_blocks.size()); + auto & pages_block = page_blocks[page_blocks_index]; + + size_t block_size = pages_block.blockSize(); + size_t & block_allocated_size = page_blocks_allocated_size[page_blocks_index]; + size_t block_free_size = block_size - block_allocated_size; + + uint8_t * pages_start = static_cast(pages_block.base()); + void * pages_offset = pages_start + block_allocated_size; + + auto * result = std::align( + alignment, + size, + pages_offset, + block_free_size); + + if (result) + { + block_allocated_size = reinterpret_cast(result) - pages_start; + return static_cast(result); + } + else + { + return nullptr; + } + } + + void allocateNextPageBlock() + { + size_t pages_to_allocate_size = (page_blocks.size() * 2) + 1; + size_t allocate_size = page_size * pages_to_allocate_size; + + llvm::errs() << "PageArena::allocatoeNextPageBlock page size " << page_size << " pages_to_allocate_size " << pages_to_allocate_size << "\n"; void * buf = nullptr; int res = posix_memalign(&buf, page_size, allocate_size); @@ -150,110 +243,102 @@ class JITModuleMemoryManager ErrorCodes::CANNOT_ALLOCATE_MEMORY, res); - auto result = llvm::sys::MemoryBlock(buf, allocate_size); - protectBlock(result, protection_flags); - allocated_size += result.allocatedSize(); + page_blocks.emplace_back(buf, pages_to_allocate_size, page_size); + page_blocks_allocated_size.emplace_back(0); - return result; + allocated_size += allocate_size; + } + }; + + class MemoryManager : public llvm::RTDyldMemoryManager + { + public: + uint8_t * allocateCodeSection(uintptr_t size, unsigned alignment, unsigned, llvm::StringRef) override + { + return reinterpret_cast(ex_page_arena.allocate(size, alignment)); } - std::error_code protectMappedMemory(const llvm::sys::MemoryBlock & Block, unsigned Flags) override + uint8_t * allocateDataSection(uintptr_t size, unsigned alignment, unsigned, llvm::StringRef, bool is_read_only) override { - int protection_flags = getPosixProtectionFlags(Flags); - bool invalidate_cache = (Flags & llvm::sys::Memory::MF_EXEC); + if (is_read_only) + return reinterpret_cast(ro_page_arena.allocate(size, alignment)); + else + return reinterpret_cast(rw_page_arena.allocate(size, alignment)); + } -#if defined(__arm__) || defined(__aarch64__) - // Certain ARM implementations treat icache clear instruction as a memory read, - // and CPU segfaults on trying to clear cache on !PROT_READ page. Therefore we need - // to temporarily add PROT_READ for the sake of flushing the instruction caches. - if (invalidate_cache && !(protection_flags & PROT_READ)) { - protectBlock(Block, protection_flags | PROT_READ); - Memory::InvalidateInstructionCache(M.Address, M.AllocatedSize); - InvalidateCache = false; - } + bool finalizeMemory(std::string *) override + { + protectPages(ro_page_arena, PROT_READ); + protectPages(ex_page_arena, PROT_READ | PROT_EXEC); + return true; + } + + ~MemoryManager() override + { + protectPages(ro_page_arena, PROT_READ | PROT_WRITE); + protectPages(ex_page_arena, PROT_READ | PROT_WRITE); + } + + inline size_t allocatedSize() const + { + size_t data_size = rw_page_arena.getAllocatedSize() + ro_page_arena.getAllocatedSize(); + size_t code_size = ex_page_arena.getAllocatedSize(); + + return data_size + code_size; + } + private: + PageArena rw_page_arena; + PageArena ro_page_arena; + PageArena ex_page_arena; + + static void protectPages(PageArena & arena, int protection_flags) + { + /** The code is partially based on the LLVM codebase + * The LLVM Project is under the Apache License v2.0 with LLVM Exceptions. + */ + const auto & blocks = arena.getPageBlocks(); + +#if defined(__NetBSD__) && defined(PROT_MPROTECT) + protection_flags |= PROT_MPROTECT(PROT_READ | PROT_WRITE | PROT_EXEC); #endif - protectBlock(Block, protection_flags); + bool invalidate_cache = (protection_flags & PROT_EXEC); - if (invalidate_cache) - llvm::sys::Memory::InvalidateInstructionCache(Block.base(), Block.allocatedSize()); - - return std::error_code(); - } - - std::error_code releaseMappedMemory(llvm::sys::MemoryBlock & M) override - { - if (M.base() == nullptr || M.allocatedSize() == 0) - return std::error_code(); - - protectBlock(M, PROT_READ | PROT_WRITE); - - free(M.base()); - allocated_size -= M.allocatedSize(); - - return std::error_code(); - } - - size_t allocated_size = 0; - - private: - - static void protectBlock(const llvm::sys::MemoryBlock & block, int protection_flags) - { - int res = ::mprotect(block.base(), block.allocatedSize(), protection_flags); - if (res != 0) - throwFromErrno(fmt::format("Cannot protect memory (m_protect) alignment {} size {}.", - block.base(), - block.allocatedSize()), - ErrorCodes::CANNOT_MPROTECT, - res); - } - - static int getPosixProtectionFlags(unsigned flags) - { - switch (flags & llvm::sys::Memory::MF_RWE_MASK) + for (const auto & block : blocks) { - case llvm::sys::Memory::MF_READ: - return PROT_READ; - case llvm::sys::Memory::MF_WRITE: - return PROT_WRITE; - case llvm::sys::Memory::MF_READ | llvm::sys::Memory::MF_WRITE: - return PROT_READ | PROT_WRITE; - case llvm::sys::Memory::MF_READ | llvm::sys::Memory::MF_EXEC: - return PROT_READ | PROT_EXEC; - case llvm::sys::Memory::MF_READ | llvm::sys::Memory::MF_WRITE | - llvm::sys::Memory::MF_EXEC: - return PROT_READ | PROT_WRITE | PROT_EXEC; - case llvm::sys::Memory::MF_EXEC: - #if (defined(__FreeBSD__) || defined(__POWERPC__) || defined (__ppc__) || \ - defined(_POWER) || defined(_ARCH_PPC)) - // On PowerPC, having an executable page that has no read permission - // can have unintended consequences. The function InvalidateInstruction- - // Cache uses instructions dcbf and icbi, both of which are treated by - // the processor as loads. If the page has no read permissions, - // executing these instructions will result in a segmentation fault. - return PROT_READ | PROT_EXEC; - #else - return PROT_EXEC; - #endif - default: - __builtin_unreachable(); +#if defined(__arm__) || defined(__aarch64__) + /// Certain ARM implementations treat icache clear instruction as a memory read, + /// and CPU segfaults on trying to clear cache on !PROT_READ page. + /// Therefore we need to temporarily add PROT_READ for the sake of flushing the instruction caches. + if (invalidate_cache && !(protection_flags & PROT_READ)) + { + int res = mprotect(block.base(), block.blockSize(), protection_flags | PROT_READ); + if (res != 0) + throwFromErrno("Cannot mprotect memory region", ErrorCodes::CANNOT_MPROTECT); + + llvm::sys::Memory::InvalidateInstructionCache(block.base(), block.blockSize()); + InvalidateCache = false; + } +#endif + int res = mprotect(block.base(), block.blockSize(), protection_flags); + if (res != 0) + throwFromErrno("Cannot mprotect memory region", ErrorCodes::CANNOT_MPROTECT); + + if (invalidate_cache) + llvm::sys::Memory::InvalidateInstructionCache(block.base(), block.blockSize()); } - // Provide a default return value as required by some compilers. - return PROT_NONE; } }; public: - JITModuleMemoryManager() : manager(&mmaper) { } + JITModuleMemoryManager() = default; - inline size_t getAllocatedSize() const { return mmaper.allocated_size; } + inline size_t getAllocatedSize() const { return manager.allocatedSize(); } - inline llvm::SectionMemoryManager & getManager() { return manager; } + inline llvm::RTDyldMemoryManager & getManager() { return manager; } private: - DefaultMMapper mmaper; - llvm::SectionMemoryManager manager; + MemoryManager manager; }; class JITSymbolResolver : public llvm::LegacyJITSymbolResolver From 061a4c4a275c0782acd29ec9c0ed88a72e907ff1 Mon Sep 17 00:00:00 2001 From: romanzhukov Date: Tue, 31 Aug 2021 14:55:46 +0300 Subject: [PATCH 040/125] Fix PR comment. --- docs/ru/operations/server-configuration-parameters/settings.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/operations/server-configuration-parameters/settings.md b/docs/ru/operations/server-configuration-parameters/settings.md index 0850b59cdb7..b6806ae6598 100644 --- a/docs/ru/operations/server-configuration-parameters/settings.md +++ b/docs/ru/operations/server-configuration-parameters/settings.md @@ -1200,7 +1200,7 @@ ClickHouse использует ZooKeeper для хранения метадан Секция конфигурационного файла,которая содержит настройки: - Путь к конфигурационному файлу с предустановленными пользователями. - Путь к файлу, в котором содержатся пользователи, созданные при помощи SQL команд. -- Путь к узлу ZooKeeper, где хранятся и реплицируются пользователи, созданные с помощью команд SQL (экспериментально). +- Путь к узлу ZooKeeper, где хранятся и реплицируются пользователи, созданные с помощью команд SQL (экспериментальная функциональность). Если эта секция определена, путь из [users_config](../../operations/server-configuration-parameters/settings.md#users-config) и [access_control_path](../../operations/server-configuration-parameters/settings.md#access_control_path) не используется. From 109d2f63d03ee974948151f4253886c2d66f8d0b Mon Sep 17 00:00:00 2001 From: Vasily Nemkov Date: Mon, 30 Aug 2021 20:37:07 +0300 Subject: [PATCH 041/125] Fixed tests and minor style issues --- programs/local/LocalServer.cpp | 3 +-- programs/server/Server.cpp | 4 ---- src/Core/MySQL/Authentication.cpp | 1 - src/Core/PostgreSQLProtocol.h | 1 - src/Interpreters/Session.cpp | 2 +- src/Interpreters/SystemLog.cpp | 7 +------ tests/queries/skip_list.json | 3 +-- 7 files changed, 4 insertions(+), 17 deletions(-) diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index 258743c7e16..278101e2c1d 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -9,7 +9,6 @@ #include #include #include -#include #include #include #include @@ -394,7 +393,7 @@ void LocalServer::processQueries() auto context = session.makeQueryContext(); context->makeSessionContext(); /// initial_create_query requires a session context to be set. context->setCurrentQueryId(""); - + applyCmdSettings(context); /// Use the same query_id (and thread group) for all queries diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 09b6add62d0..bf4e2f947dc 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -54,10 +54,6 @@ #include #include #include -#include -#include -#include -#include #include #include #include diff --git a/src/Core/MySQL/Authentication.cpp b/src/Core/MySQL/Authentication.cpp index 76fb6bad833..0eb080892c1 100644 --- a/src/Core/MySQL/Authentication.cpp +++ b/src/Core/MySQL/Authentication.cpp @@ -2,7 +2,6 @@ #include #include #include -#include #include #include #include diff --git a/src/Core/PostgreSQLProtocol.h b/src/Core/PostgreSQLProtocol.h index 6fc69d2d5b2..f0de4bbb843 100644 --- a/src/Core/PostgreSQLProtocol.h +++ b/src/Core/PostgreSQLProtocol.h @@ -7,7 +7,6 @@ #include #include #include -#include #include #include #include diff --git a/src/Interpreters/Session.cpp b/src/Interpreters/Session.cpp index b1f7f4349f4..d8480f3858e 100644 --- a/src/Interpreters/Session.cpp +++ b/src/Interpreters/Session.cpp @@ -309,7 +309,7 @@ void Session::authenticate(const Credentials & credentials_, const Poco::Net::So { user_id = global_context->getAccessControlManager().login(credentials_, address.host()); } - catch(const Exception & e) + catch (const Exception & e) { if (auto session_log = getSessionLog()) session_log->addLoginFailure(session_id, *prepared_client_info, credentials_.getUserName(), e); diff --git a/src/Interpreters/SystemLog.cpp b/src/Interpreters/SystemLog.cpp index dfc16dae49c..2ccb84e1ffa 100644 --- a/src/Interpreters/SystemLog.cpp +++ b/src/Interpreters/SystemLog.cpp @@ -6,14 +6,9 @@ #include #include #include -#include +#include #include #include -#include -#include -#include -#include -#include #include #include diff --git a/tests/queries/skip_list.json b/tests/queries/skip_list.json index 91fca7eb5d5..6442ac5ed12 100644 --- a/tests/queries/skip_list.json +++ b/tests/queries/skip_list.json @@ -513,8 +513,7 @@ "01530_drop_database_atomic_sync", /// creates database "02001_add_default_database_to_system_users", ///create user "02002_row_level_filter_bug", ///create user - "02015_system_views" - "02002_row_level_filter_bug", ///create user + "02015_system_views", "01747_system_session_log_long" // Reads from system.session_log and can't be run in parallel with any other test (since almost any other test writes to session_log) ] } From 185d29439b548c73c7939e0da770fd7452e2212a Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 31 Aug 2021 20:16:51 +0300 Subject: [PATCH 042/125] Fix --- src/Storages/StorageTableFunction.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/Storages/StorageTableFunction.h b/src/Storages/StorageTableFunction.h index 75480db7ef3..d44133a1a89 100644 --- a/src/Storages/StorageTableFunction.h +++ b/src/Storages/StorageTableFunction.h @@ -140,6 +140,13 @@ public: bool isView() const override { return false; } void checkTableCanBeDropped() const override {} + StoragePolicyPtr getStoragePolicy() const override + { + if (nested) + return StorageProxy::getStoragePolicy(); + return IStorage::getStoragePolicy(); + } + private: mutable std::mutex nested_mutex; mutable GetNestedStorageFunc get_nested; From 437c87fff3b2f8094ad29e5a27136ea735d1d494 Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Wed, 1 Sep 2021 00:06:10 +0300 Subject: [PATCH 043/125] Update StorageTableFunction.h --- src/Storages/StorageTableFunction.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Storages/StorageTableFunction.h b/src/Storages/StorageTableFunction.h index d44133a1a89..4f9ca5d5858 100644 --- a/src/Storages/StorageTableFunction.h +++ b/src/Storages/StorageTableFunction.h @@ -142,6 +142,7 @@ public: StoragePolicyPtr getStoragePolicy() const override { + std::lock_guard lock{nested_mutex}; if (nested) return StorageProxy::getStoragePolicy(); return IStorage::getStoragePolicy(); From bf9abc2888cd08ff956d1d2449669176486bee7c Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Wed, 1 Sep 2021 10:46:41 +0300 Subject: [PATCH 044/125] Update StorageTableFunction.h --- src/Storages/StorageTableFunction.h | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Storages/StorageTableFunction.h b/src/Storages/StorageTableFunction.h index 4f9ca5d5858..d44133a1a89 100644 --- a/src/Storages/StorageTableFunction.h +++ b/src/Storages/StorageTableFunction.h @@ -142,7 +142,6 @@ public: StoragePolicyPtr getStoragePolicy() const override { - std::lock_guard lock{nested_mutex}; if (nested) return StorageProxy::getStoragePolicy(); return IStorage::getStoragePolicy(); From 09c9dd489b65c18c99f45fc53268329cbc12e0a3 Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Wed, 1 Sep 2021 13:28:34 +0300 Subject: [PATCH 045/125] Update StorageRabbitMQ.cpp --- src/Storages/RabbitMQ/StorageRabbitMQ.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp index ce7e5941b68..2b37ca0763d 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp @@ -1066,11 +1066,11 @@ void registerStorageRabbitMQ(StorageFactory & factory) if (!rabbitmq_settings->rabbitmq_host_port.changed && !rabbitmq_settings->rabbitmq_address.changed) - throw Exception("You must speicify either `rabbitmq_host_port` or `rabbitmq_address` settings", + throw Exception("You must specify either `rabbitmq_host_port` or `rabbitmq_address` settings", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); if (!rabbitmq_settings->rabbitmq_format.changed) - throw Exception("You must speicify `rabbitmq_format` setting", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception("You must specify `rabbitmq_format` setting", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); return StorageRabbitMQ::create(args.table_id, args.getContext(), args.columns, std::move(rabbitmq_settings)); }; From 4c613f30b327c4093f7c70905a6cd0e3ff9a0f56 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 2 Sep 2021 14:40:54 +0300 Subject: [PATCH 046/125] Simplier sessions expiration in Keeper --- src/Coordination/KeeperStorage.cpp | 4 +- src/Coordination/KeeperStorage.h | 4 +- src/Coordination/SessionExpiryQueue.cpp | 65 ++++++++++++------- src/Coordination/SessionExpiryQueue.h | 29 +++++++-- ...t_for_build.cpp => gtest_coordination.cpp} | 17 +++++ 5 files changed, 86 insertions(+), 33 deletions(-) rename src/Coordination/tests/{gtest_for_build.cpp => gtest_coordination.cpp} (98%) diff --git a/src/Coordination/KeeperStorage.cpp b/src/Coordination/KeeperStorage.cpp index 3053ce17ad1..207ab25ddd2 100644 --- a/src/Coordination/KeeperStorage.cpp +++ b/src/Coordination/KeeperStorage.cpp @@ -1078,7 +1078,6 @@ KeeperStorage::ResponsesForSessions KeeperStorage::processRequest(const Coordina zxid = *new_last_zxid; } - session_expiry_queue.update(session_id, session_and_timeout[session_id]); if (zk_request->getOpNum() == Coordination::OpNum::Close) /// Close request is special { @@ -1115,6 +1114,9 @@ KeeperStorage::ResponsesForSessions KeeperStorage::processRequest(const Coordina } else if (zk_request->getOpNum() == Coordination::OpNum::Heartbeat) /// Heartbeat request is also special { + /// Update session only for heartbeats + session_expiry_queue.addNewSessionOrUpdate(session_id, session_and_timeout[session_id]); + KeeperStorageRequestProcessorPtr storage_request = KeeperStorageRequestProcessorsFactory::instance().get(zk_request); auto [response, _] = storage_request->process(*this, zxid, session_id); response->xid = zk_request->xid; diff --git a/src/Coordination/KeeperStorage.h b/src/Coordination/KeeperStorage.h index 1e925a0634e..041dab05156 100644 --- a/src/Coordination/KeeperStorage.h +++ b/src/Coordination/KeeperStorage.h @@ -123,7 +123,7 @@ public: { auto result = session_id_counter++; session_and_timeout.emplace(result, session_timeout_ms); - session_expiry_queue.update(result, session_timeout_ms); + session_expiry_queue.addNewSessionOrUpdate(result, session_timeout_ms); return result; } @@ -131,7 +131,7 @@ public: void addSessionID(int64_t session_id, int64_t session_timeout_ms) { session_and_timeout.emplace(session_id, session_timeout_ms); - session_expiry_queue.update(session_id, session_timeout_ms); + session_expiry_queue.addNewSessionOrUpdate(session_id, session_timeout_ms); } /// Process user request and return response. diff --git a/src/Coordination/SessionExpiryQueue.cpp b/src/Coordination/SessionExpiryQueue.cpp index 51837087af5..e19a92c29d7 100644 --- a/src/Coordination/SessionExpiryQueue.cpp +++ b/src/Coordination/SessionExpiryQueue.cpp @@ -1,5 +1,6 @@ #include #include + namespace DB { @@ -12,66 +13,80 @@ bool SessionExpiryQueue::remove(int64_t session_id) if (set_it != expiry_to_sessions.end()) set_it->second.erase(session_id); + /// No more sessions in this bucket + if (set_it->second.empty()) + expiry_to_sessions.erase(set_it); + + session_to_timeout.erase(session_it); + return true; } return false; } -bool SessionExpiryQueue::update(int64_t session_id, int64_t timeout_ms) +void SessionExpiryQueue::addNewSessionOrUpdate(int64_t session_id, int64_t timeout_ms) { - auto session_it = session_to_timeout.find(session_id); int64_t now = getNowMilliseconds(); + /// round up to next interval int64_t new_expiry_time = roundToNextInterval(now + timeout_ms); + auto session_it = session_to_timeout.find(session_id); + /// We already registered this session if (session_it != session_to_timeout.end()) { - if (new_expiry_time == session_it->second) - return false; + int64_t prev_expiry_time = session_it->second; + session_it->second = new_expiry_time; + /// Nothing changed, session stay in the some bucket + if (new_expiry_time == prev_expiry_time) + return; + /// This bucket doesn't exist, let's create it auto set_it = expiry_to_sessions.find(new_expiry_time); if (set_it == expiry_to_sessions.end()) std::tie(set_it, std::ignore) = expiry_to_sessions.emplace(new_expiry_time, std::unordered_set()); + /// Add session to the next bucket set_it->second.insert(session_id); - int64_t prev_expiry_time = session_it->second; - if (prev_expiry_time != new_expiry_time) - { - auto prev_set_it = expiry_to_sessions.find(prev_expiry_time); - if (prev_set_it != expiry_to_sessions.end()) - prev_set_it->second.erase(session_id); - } - session_it->second = new_expiry_time; - return true; + auto prev_set_it = expiry_to_sessions.find(prev_expiry_time); + /// Remove session from previous bucket + if (prev_set_it != expiry_to_sessions.end()) + prev_set_it->second.erase(session_id); + + /// No more sessions in this bucket + if (prev_set_it->second.empty()) + expiry_to_sessions.erase(prev_set_it); } else { + /// Just add sessions to the new bucket session_to_timeout[session_id] = new_expiry_time; + auto set_it = expiry_to_sessions.find(new_expiry_time); if (set_it == expiry_to_sessions.end()) std::tie(set_it, std::ignore) = expiry_to_sessions.emplace(new_expiry_time, std::unordered_set()); + set_it->second.insert(session_id); - return false; } } -std::unordered_set SessionExpiryQueue::getExpiredSessions() +std::unordered_set SessionExpiryQueue::getExpiredSessions() const { int64_t now = getNowMilliseconds(); - if (now < next_expiration_time) - return {}; + std::unordered_set result; - auto set_it = expiry_to_sessions.find(next_expiration_time); - int64_t new_expiration_time = next_expiration_time + expiration_interval; - next_expiration_time = new_expiration_time; - if (set_it != expiry_to_sessions.end()) + /// Check all buckets + for (auto it = expiry_to_sessions.begin(); it != expiry_to_sessions.end(); ++it) { - auto result = set_it->second; - expiry_to_sessions.erase(set_it); - return result; + int64_t expire_time_for_sessions = it->first; + if (expire_time_for_sessions <= now) + result.insert(it->second.begin(), it->second.end()); + else + break; } - return {}; + + return result; } void SessionExpiryQueue::clear() diff --git a/src/Coordination/SessionExpiryQueue.h b/src/Coordination/SessionExpiryQueue.h index dff629a2432..8270c8fc6c8 100644 --- a/src/Coordination/SessionExpiryQueue.h +++ b/src/Coordination/SessionExpiryQueue.h @@ -1,4 +1,5 @@ #pragma once +#include #include #include #include @@ -6,14 +7,25 @@ namespace DB { +/// Simple class for checking expired sessions. Main idea -- to round sessions +/// timeouts and place all sessions into buckets rounded by their expired time. +/// So we will have not too many different buckets and can check expired +/// sessions quite fast. +/// So buckets looks like this: +/// [1630580418000] -> {1, 5, 6} +/// [1630580418500] -> {2, 3} +/// ... +/// When new session appear it's added to the existing bucket or create new bucket. class SessionExpiryQueue { private: + /// Session -> timeout ms std::unordered_map session_to_timeout; - std::unordered_map> expiry_to_sessions; + + /// Expire time -> session expire near this time + std::map> expiry_to_sessions; int64_t expiration_interval; - int64_t next_expiration_time; static int64_t getNowMilliseconds() { @@ -21,23 +33,30 @@ private: return duration_cast(system_clock::now().time_since_epoch()).count(); } + /// Round time to the next expiration interval. The result used as a key for + /// expiry_to_sessions map. int64_t roundToNextInterval(int64_t time) const { return (time / expiration_interval + 1) * expiration_interval; } public: + /// expiration_interval -- how often we will check new sessions and how small + /// buckets we will have. In ZooKeeper normal session timeout is around 30 seconds + /// and expiration_interval is about 500ms. explicit SessionExpiryQueue(int64_t expiration_interval_) : expiration_interval(expiration_interval_) - , next_expiration_time(roundToNextInterval(getNowMilliseconds())) { } + /// Session was actually removed bool remove(int64_t session_id); - bool update(int64_t session_id, int64_t timeout_ms); + /// Update session expiry time (must be called on hearbeats) + void addNewSessionOrUpdate(int64_t session_id, int64_t timeout_ms); - std::unordered_set getExpiredSessions(); + /// Get all expired sessions + std::unordered_set getExpiredSessions() const; void clear(); }; diff --git a/src/Coordination/tests/gtest_for_build.cpp b/src/Coordination/tests/gtest_coordination.cpp similarity index 98% rename from src/Coordination/tests/gtest_for_build.cpp rename to src/Coordination/tests/gtest_coordination.cpp index 47eadbf9720..2c1cddd124b 100644 --- a/src/Coordination/tests/gtest_for_build.cpp +++ b/src/Coordination/tests/gtest_coordination.cpp @@ -1374,6 +1374,23 @@ TEST(CoordinationTest, TestRotateIntervalChanges) EXPECT_TRUE(fs::exists("./logs/changelog_141_145.bin")); } +TEST(CoordinationTest, TestSessionExpiryQueue) +{ + using namespace Coordination; + SessionExpiryQueue queue(500); + + queue.addNewSessionOrUpdate(1, 1000); + + for (size_t i = 0; i < 2; ++i) + { + EXPECT_EQ(queue.getExpiredSessions(), std::unordered_set({})); + std::this_thread::sleep_for(std::chrono::milliseconds(400)); + } + + std::this_thread::sleep_for(std::chrono::milliseconds(700)); + EXPECT_EQ(queue.getExpiredSessions(), std::unordered_set({1})); +} + int main(int argc, char ** argv) { From c5470864e86adbd3b5c2fc6db1dc884e87a6f170 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 2 Sep 2021 14:43:34 +0300 Subject: [PATCH 047/125] Fixup --- src/Coordination/KeeperStorage.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/Coordination/KeeperStorage.cpp b/src/Coordination/KeeperStorage.cpp index 207ab25ddd2..7ad7fa6c779 100644 --- a/src/Coordination/KeeperStorage.cpp +++ b/src/Coordination/KeeperStorage.cpp @@ -1078,6 +1078,7 @@ KeeperStorage::ResponsesForSessions KeeperStorage::processRequest(const Coordina zxid = *new_last_zxid; } + session_expiry_queue.addNewSessionOrUpdate(session_id, session_and_timeout[session_id]); if (zk_request->getOpNum() == Coordination::OpNum::Close) /// Close request is special { @@ -1114,9 +1115,6 @@ KeeperStorage::ResponsesForSessions KeeperStorage::processRequest(const Coordina } else if (zk_request->getOpNum() == Coordination::OpNum::Heartbeat) /// Heartbeat request is also special { - /// Update session only for heartbeats - session_expiry_queue.addNewSessionOrUpdate(session_id, session_and_timeout[session_id]); - KeeperStorageRequestProcessorPtr storage_request = KeeperStorageRequestProcessorsFactory::instance().get(zk_request); auto [response, _] = storage_request->process(*this, zxid, session_id); response->xid = zk_request->xid; From abda2a636ebdd5ece5e16aefd86d3e5b3e9cfcd5 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Thu, 2 Sep 2021 14:53:20 +0300 Subject: [PATCH 048/125] Added ExecutablePool storage --- src/DataStreams/ShellCommandSource.h | 181 ++++++++++++++++- .../ExecutableDictionarySource.cpp | 4 +- src/Dictionaries/ExecutableDictionarySource.h | 7 +- .../ExecutablePoolDictionarySource.cpp | 188 ++++-------------- .../ExecutablePoolDictionarySource.h | 13 +- src/Storages/ExecutablePoolSettings.cpp | 42 ++++ src/Storages/ExecutablePoolSettings.h | 24 +++ src/Storages/StorageExecutable.cpp | 107 +++++++++- src/Storages/StorageExecutable.h | 29 ++- .../test_executable_table_function/test.py | 12 ++ .../user_scripts/test_input_process_pool.sh | 3 + .../test_input_process_pool_multiple_pipes.sh | 10 + 12 files changed, 435 insertions(+), 185 deletions(-) create mode 100644 src/Storages/ExecutablePoolSettings.cpp create mode 100644 src/Storages/ExecutablePoolSettings.h create mode 100755 tests/integration/test_executable_table_function/user_scripts/test_input_process_pool.sh create mode 100755 tests/integration/test_executable_table_function/user_scripts/test_input_process_pool_multiple_pipes.sh diff --git a/src/DataStreams/ShellCommandSource.h b/src/DataStreams/ShellCommandSource.h index 4b8e3036f38..1559a96b456 100644 --- a/src/DataStreams/ShellCommandSource.h +++ b/src/DataStreams/ShellCommandSource.h @@ -3,8 +3,11 @@ #include #include +#include + #include #include + #include #include #include @@ -17,8 +20,9 @@ namespace DB { -/** A stream, that runs child process and sends data to its stdin in background thread, - * and receives data from its stdout. +/** A stream, that get child process and sends data tasks. + * For each send data task background thread is created, send data tasks must send data to process input pipes. + * ShellCommandSource receives data from process stdout. */ class ShellCommandSource final : public SourceWithProgress { @@ -29,7 +33,7 @@ public: ContextPtr context, const std::string & format, const Block & sample_block, - std::unique_ptr command_, + std::unique_ptr && command_, Poco::Logger * log_, std::vector && send_data_tasks, size_t max_block_size = DEFAULT_BLOCK_SIZE) @@ -48,7 +52,7 @@ public: ContextPtr context, const std::string & format, const Block & sample_block, - std::unique_ptr command_, + std::unique_ptr && command_, Poco::Logger * log_, size_t max_block_size = DEFAULT_BLOCK_SIZE) : SourceWithProgress(sample_block) @@ -107,4 +111,173 @@ private: Poco::Logger * log; }; +/** A stream, that get child process and sends data tasks. + * For each send data task background thread is created, send data tasks must send data to process input pipes. + * ShellCommandPoolSource receives data from process stdout. + * + * Main difference with ShellCommandSource is that ShellCommandPoolSource initialized with process_pool and rows_to_read. + * Rows to read are necessary because processes in pool are not destroyed and work in read write loop. + * Source need to finish generating new chunks after rows_to_read rows are generated from process. + * + * If rows_to_read are not specified it is expected that script will output rows_to_read before other data. + * + * After source is destroyed process is returned to pool. + */ + +using ProcessPool = BorrowedObjectPool>; + +class ShellCommandPoolSource final : public SourceWithProgress +{ +public: + using SendDataTask = std::function; + + ShellCommandPoolSource( + ContextPtr context, + const std::string & format, + const Block & sample_block, + std::shared_ptr process_pool_, + std::unique_ptr && command_, + size_t rows_to_read_, + Poco::Logger * log_, + std::vector && send_data_tasks) + : SourceWithProgress(sample_block) + , process_pool(process_pool_) + , command(std::move(command_)) + , rows_to_read(rows_to_read_) + , log(log_) + { + for (auto && send_data_task : send_data_tasks) + { + send_data_threads.emplace_back([task = std::move(send_data_task), this]() + { + try + { + task(); + } + catch (...) + { + std::lock_guard lock(send_data_lock); + exception_during_send_data = std::current_exception(); + } + }); + } + + pipeline.init(Pipe(FormatFactory::instance().getInput(format, command->out, sample_block, context, rows_to_read))); + executor = std::make_unique(pipeline); + } + + ShellCommandPoolSource( + ContextPtr context, + const std::string & format, + const Block & sample_block, + std::shared_ptr process_pool_, + std::unique_ptr && command_, + Poco::Logger * log_, + std::vector && send_data_tasks) + : SourceWithProgress(sample_block) + , process_pool(process_pool_) + , command(std::move(command_)) + , log(log_) + { + for (auto && send_data_task : send_data_tasks) + { + send_data_threads.emplace_back([task = std::move(send_data_task), this]() + { + try + { + task(); + } + catch (...) + { + std::lock_guard lock(send_data_lock); + exception_during_send_data = std::current_exception(); + } + }); + } + + readText(rows_to_read, command->out); + pipeline.init(Pipe(FormatFactory::instance().getInput(format, command->out, sample_block, context, rows_to_read))); + executor = std::make_unique(pipeline); + } + + + ~ShellCommandPoolSource() override + { + for (auto & thread : send_data_threads) + if (thread.joinable()) + thread.join(); + + if (command) + process_pool->returnObject(std::move(command)); + } + +protected: + Chunk generate() override + { + rethrowExceptionDuringReadIfNeeded(); + + if (current_read_rows == rows_to_read) + return {}; + + Chunk chunk; + + try + { + if (!executor->pull(chunk)) + return {}; + + current_read_rows += chunk.getNumRows(); + } + catch (...) + { + tryLogCurrentException(log); + command = nullptr; + throw; + } + + return chunk; + } + +public: + Status prepare() override + { + auto status = SourceWithProgress::prepare(); + + if (status == Status::Finished) + { + for (auto & thread : send_data_threads) + if (thread.joinable()) + thread.join(); + + rethrowExceptionDuringReadIfNeeded(); + } + + return status; + } + + void rethrowExceptionDuringReadIfNeeded() + { + std::lock_guard lock(send_data_lock); + if (exception_during_send_data) + { + command = nullptr; + std::rethrow_exception(exception_during_send_data); + } + } + + String getName() const override { return "ShellCommandPoolSource"; } + + std::shared_ptr process_pool; + std::unique_ptr command; + QueryPipeline pipeline; + std::unique_ptr executor; + size_t rows_to_read = 0; + Poco::Logger * log; + std::vector send_data_threads; + + size_t current_read_rows = 0; + + std::mutex send_data_lock; + std::exception_ptr exception_during_send_data; +}; } diff --git a/src/Dictionaries/ExecutableDictionarySource.cpp b/src/Dictionaries/ExecutableDictionarySource.cpp index 0362ca9ea9d..a274e820e65 100644 --- a/src/Dictionaries/ExecutableDictionarySource.cpp +++ b/src/Dictionaries/ExecutableDictionarySource.cpp @@ -1,6 +1,5 @@ #include "ExecutableDictionarySource.h" -#include #include #include #include @@ -125,8 +124,7 @@ Pipe ExecutableDictionarySource::getStreamForBlock(const Block & block) formatBlock(output_stream, block); out.close(); }}; - - std::vector tasks = {task}; + std::vector tasks = {std::move(task)}; Pipe pipe(std::make_unique(context, configuration.format, sample_block, std::move(process), log, std::move(tasks))); diff --git a/src/Dictionaries/ExecutableDictionarySource.h b/src/Dictionaries/ExecutableDictionarySource.h index f2d295b9b18..3133bc12b09 100644 --- a/src/Dictionaries/ExecutableDictionarySource.h +++ b/src/Dictionaries/ExecutableDictionarySource.h @@ -1,11 +1,12 @@ #pragma once -#include "DictionaryStructure.h" -#include "IDictionarySource.h" +#include + #include #include -namespace Poco { class Logger; } +#include +#include namespace DB diff --git a/src/Dictionaries/ExecutablePoolDictionarySource.cpp b/src/Dictionaries/ExecutablePoolDictionarySource.cpp index 24bd31acd85..e3cad41c856 100644 --- a/src/Dictionaries/ExecutablePoolDictionarySource.cpp +++ b/src/Dictionaries/ExecutablePoolDictionarySource.cpp @@ -1,24 +1,21 @@ #include "ExecutablePoolDictionarySource.h" -#include -#include -#include -#include -#include +#include +#include +#include + +#include +#include + #include + #include #include #include -#include -#include -#include -#include -#include -#include -#include "DictionarySourceFactory.h" -#include "DictionarySourceHelpers.h" -#include "DictionaryStructure.h" -#include "registerDictionaries.h" + +#include +#include +#include namespace DB @@ -37,13 +34,13 @@ ExecutablePoolDictionarySource::ExecutablePoolDictionarySource( const Configuration & configuration_, Block & sample_block_, ContextPtr context_) - : log(&Poco::Logger::get("ExecutablePoolDictionarySource")) - , dict_struct{dict_struct_} - , configuration{configuration_} - , sample_block{sample_block_} - , context{context_} + : dict_struct(dict_struct_) + , configuration(configuration_) + , sample_block(sample_block_) + , context(context_) /// If pool size == 0 then there is no size restrictions. Poco max size of semaphore is integer type. - , process_pool{std::make_shared(configuration.pool_size == 0 ? std::numeric_limits::max() : configuration.pool_size)} + , process_pool(std::make_shared(configuration.pool_size == 0 ? std::numeric_limits::max() : configuration.pool_size)) + , log(&Poco::Logger::get("ExecutablePoolDictionarySource")) { /// Remove keys from sample_block for implicit_key dictionary because /// these columns will not be returned from source @@ -62,13 +59,12 @@ ExecutablePoolDictionarySource::ExecutablePoolDictionarySource( } ExecutablePoolDictionarySource::ExecutablePoolDictionarySource(const ExecutablePoolDictionarySource & other) - : log(&Poco::Logger::get("ExecutablePoolDictionarySource")) - , update_time{other.update_time} - , dict_struct{other.dict_struct} - , configuration{other.configuration} - , sample_block{other.sample_block} - , context{Context::createCopy(other.context)} - , process_pool{std::make_shared(configuration.pool_size)} + : dict_struct(other.dict_struct) + , configuration(other.configuration) + , sample_block(other.sample_block) + , context(Context::createCopy(other.context)) + , process_pool(std::make_shared(configuration.pool_size)) + , log(&Poco::Logger::get("ExecutablePoolDictionarySource")) { } @@ -82,123 +78,6 @@ Pipe ExecutablePoolDictionarySource::loadUpdatedAll() throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "ExecutablePoolDictionarySource does not support loadUpdatedAll method"); } -namespace -{ - /** A stream, that runs child process and sends data to its stdin in background thread, - * and receives data from its stdout. - */ - class PoolSourceWithBackgroundThread final : public SourceWithProgress - { - public: - PoolSourceWithBackgroundThread( - std::shared_ptr process_pool_, - std::unique_ptr && command_, - Pipe pipe, - size_t read_rows_, - Poco::Logger * log_, - std::function && send_data_) - : SourceWithProgress(pipe.getHeader()) - , process_pool(process_pool_) - , command(std::move(command_)) - , rows_to_read(read_rows_) - , log(log_) - , send_data(std::move(send_data_)) - , thread([this] - { - try - { - send_data(command->in); - } - catch (...) - { - std::lock_guard lck(exception_during_read_lock); - exception_during_read = std::current_exception(); - } - }) - { - pipeline.init(std::move(pipe)); - executor = std::make_unique(pipeline); - } - - ~PoolSourceWithBackgroundThread() override - { - if (thread.joinable()) - thread.join(); - - if (command) - process_pool->returnObject(std::move(command)); - } - - protected: - Chunk generate() override - { - rethrowExceptionDuringReadIfNeeded(); - - if (current_read_rows == rows_to_read) - return {}; - - Chunk chunk; - - try - { - if (!executor->pull(chunk)) - return {}; - - current_read_rows += chunk.getNumRows(); - } - catch (...) - { - tryLogCurrentException(log); - command = nullptr; - throw; - } - - return chunk; - } - - public: - Status prepare() override - { - auto status = SourceWithProgress::prepare(); - - if (status == Status::Finished) - { - if (thread.joinable()) - thread.join(); - - rethrowExceptionDuringReadIfNeeded(); - } - - return status; - } - - void rethrowExceptionDuringReadIfNeeded() - { - std::lock_guard lck(exception_during_read_lock); - if (exception_during_read) - { - command = nullptr; - std::rethrow_exception(exception_during_read); - } - } - - String getName() const override { return "PoolWithBackgroundThread"; } - - std::shared_ptr process_pool; - std::unique_ptr command; - QueryPipeline pipeline; - std::unique_ptr executor; - size_t rows_to_read; - Poco::Logger * log; - std::function send_data; - ThreadFromGlobalPool thread; - size_t current_read_rows = 0; - std::mutex exception_during_read_lock; - std::exception_ptr exception_during_read; - }; - -} - Pipe ExecutablePoolDictionarySource::loadIds(const std::vector & ids) { LOG_TRACE(log, "loadIds {} size = {}", toString(), ids.size()); @@ -228,19 +107,20 @@ Pipe ExecutablePoolDictionarySource::getStreamForBlock(const Block & block) if (!result) throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, - "Could not get process from pool, max command execution timeout exceeded ({}) seconds", + "Could not get process from pool, max command execution timeout exceeded {} seconds", configuration.max_command_execution_time); size_t rows_to_read = block.rows(); - auto format = FormatFactory::instance().getInput(configuration.format, process->out, sample_block, context, rows_to_read); + auto * process_in = &process->in; + ShellCommandPoolSource::SendDataTask task = [process_in, block, this]() mutable + { + auto & out = *process_in; + auto output_stream = context->getOutputStream(configuration.format, out, block.cloneEmpty()); + formatBlock(output_stream, block); + }; + std::vector tasks = {std::move(task)}; - Pipe pipe(std::make_unique( - process_pool, std::move(process), Pipe(std::move(format)), rows_to_read, log, - [block, this](WriteBufferFromFile & out) mutable - { - auto output_stream = context->getOutputStream(configuration.format, out, block.cloneEmpty()); - formatBlock(output_stream, block); - })); + Pipe pipe(std::make_unique(context, configuration.format, sample_block, process_pool, std::move(process), rows_to_read, log, std::move(tasks))); if (configuration.implicit_key) pipe.addTransform(std::make_shared(block, pipe.getHeader())); diff --git a/src/Dictionaries/ExecutablePoolDictionarySource.h b/src/Dictionaries/ExecutablePoolDictionarySource.h index 22d7b713a24..b80122fb56f 100644 --- a/src/Dictionaries/ExecutablePoolDictionarySource.h +++ b/src/Dictionaries/ExecutablePoolDictionarySource.h @@ -1,20 +1,18 @@ #pragma once -#include +#include #include #include -#include "IDictionarySource.h" -#include "DictionaryStructure.h" - -namespace Poco { class Logger; } +#include +#include +#include namespace DB { -using ProcessPool = BorrowedObjectPool>; /** ExecutablePoolDictionarySource allows loading data from pool of processes. * When client requests ids or keys source get process from ProcessPool @@ -73,14 +71,13 @@ public: Pipe getStreamForBlock(const Block & block); private: - Poco::Logger * log; - time_t update_time = 0; const DictionaryStructure dict_struct; const Configuration configuration; Block sample_block; ContextPtr context; std::shared_ptr process_pool; + Poco::Logger * log; }; } diff --git a/src/Storages/ExecutablePoolSettings.cpp b/src/Storages/ExecutablePoolSettings.cpp new file mode 100644 index 00000000000..8951c8edabf --- /dev/null +++ b/src/Storages/ExecutablePoolSettings.cpp @@ -0,0 +1,42 @@ +#include "ExecutablePoolSettings.h" + +#include + +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int UNKNOWN_SETTING; +} + +IMPLEMENT_SETTINGS_TRAITS(ExecutablePoolSettingsTraits, LIST_OF_EXECUTABLE_POOL_SETTINGS); + +void ExecutablePoolSettings::loadFromQuery(ASTStorage & storage_def) +{ + if (storage_def.settings) + { + try + { + applyChanges(storage_def.settings->changes); + } + catch (Exception & e) + { + if (e.code() == ErrorCodes::UNKNOWN_SETTING) + e.addMessage("for storage " + storage_def.engine->name); + throw; + } + } + else + { + auto settings_ast = std::make_shared(); + settings_ast->is_standalone = false; + storage_def.set(storage_def.settings, settings_ast); + } +} + +} diff --git a/src/Storages/ExecutablePoolSettings.h b/src/Storages/ExecutablePoolSettings.h new file mode 100644 index 00000000000..6de9b0f0e6c --- /dev/null +++ b/src/Storages/ExecutablePoolSettings.h @@ -0,0 +1,24 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class ASTStorage; + +#define LIST_OF_EXECUTABLE_POOL_SETTINGS(M) \ + M(UInt64, pool_size, 16, "Processes pool size. If size == 0, then there is no size restrictions", 0) \ + M(UInt64, max_command_execution_time, 10, "Max command execution time in seconds.", 0) \ + M(UInt64, command_termination_timeout, 10, "Command termination timeout in seconds.", 0) \ + +DECLARE_SETTINGS_TRAITS(ExecutablePoolSettingsTraits, LIST_OF_EXECUTABLE_POOL_SETTINGS) + +/// Settings for ExecutablePool engine. +struct ExecutablePoolSettings : public BaseSettings +{ + void loadFromQuery(ASTStorage & storage_def); +}; + +} diff --git a/src/Storages/StorageExecutable.cpp b/src/Storages/StorageExecutable.cpp index a71cf20119b..6d81a4eff40 100644 --- a/src/Storages/StorageExecutable.cpp +++ b/src/Storages/StorageExecutable.cpp @@ -4,16 +4,18 @@ #include #include + #include #include #include +#include + +#include #include #include #include #include #include -#include -#include namespace DB @@ -24,6 +26,7 @@ namespace ErrorCodes extern const int UNSUPPORTED_METHOD; extern const int LOGICAL_ERROR; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int TIMEOUT_EXCEEDED; } StorageExecutable::StorageExecutable( @@ -47,6 +50,31 @@ StorageExecutable::StorageExecutable( setInMemoryMetadata(storage_metadata); } +StorageExecutable::StorageExecutable( + const StorageID & table_id_, + const String & script_name_, + const std::vector & arguments_, + const String & format_, + const std::vector & input_queries_, + const ExecutablePoolSettings & pool_settings_, + const ColumnsDescription & columns, + const ConstraintsDescription & constraints) + : IStorage(table_id_) + , script_name(script_name_) + , arguments(arguments_) + , format(format_) + , input_queries(input_queries_) + , pool_settings(pool_settings_) + /// If pool size == 0 then there is no size restrictions. Poco max size of semaphore is integer type. + , process_pool(std::make_shared(pool_settings.pool_size == 0 ? std::numeric_limits::max() : pool_settings.pool_size)) + , log(&Poco::Logger::get("StorageExecutablePool")) +{ + StorageInMemoryMetadata storage_metadata; + storage_metadata.setColumns(columns); + storage_metadata.setConstraints(constraints); + setInMemoryMetadata(storage_metadata); +} + Pipe StorageExecutable::read( const Names & /*column_names*/, const StorageMetadataPtr & metadata_snapshot, @@ -56,6 +84,8 @@ Pipe StorageExecutable::read( size_t max_block_size, unsigned /*threads*/) { + std::cerr << getName() << "::read" << std::endl; + auto user_scripts_path = context->getUserScriptsPath(); auto script_path = user_scripts_path + '/' + script_name; if (!std::filesystem::exists(std::filesystem::path(script_path))) @@ -79,7 +109,27 @@ Pipe StorageExecutable::read( for (size_t i = 1; i < inputs.size(); ++i) config.write_fds.emplace_back(i + 2); - auto process = ShellCommand::executeDirect(config); + std::unique_ptr process; + + if (process_pool) + { + std::cerr << getName() <<"::read create process" << std::endl; + bool result = process_pool->tryBorrowObject(process, [&config, this]() + { + config.terminate_in_destructor_strategy = ShellCommand::DestructorStrategy{ true /*terminate_in_destructor*/, pool_settings.command_termination_timeout }; + auto shell_command = ShellCommand::execute(config); + return shell_command; + }, pool_settings.max_command_execution_time * 10000); + + if (!result) + throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, + "Could not get process from pool, max command execution timeout exceeded {} seconds", + pool_settings.max_command_execution_time); + } + else + { + process = ShellCommand::executeDirect(config); + } std::vector tasks; tasks.reserve(inputs.size()); @@ -123,13 +173,22 @@ Pipe StorageExecutable::read( } auto sample_block = metadata_snapshot->getSampleBlock(); - Pipe pipe(std::make_unique(context, format, sample_block, std::move(process), log, std::move(tasks), max_block_size)); - return pipe; + + if (process_pool) + { + Pipe pipe(std::make_unique(context, format, std::move(sample_block), process_pool, std::move(process), log, std::move(tasks))); + return pipe; + } + else + { + Pipe pipe(std::make_unique(context, format, std::move(sample_block), std::move(process), log, std::move(tasks), max_block_size)); + return pipe; + } } void registerStorageExecutable(StorageFactory & factory) { - factory.registerStorage("Executable", [](const StorageFactory::Arguments & args) + auto register_storage = [](const StorageFactory::Arguments & args, bool is_executable_pool) -> StoragePtr { auto local_context = args.getLocalContext(); @@ -143,7 +202,7 @@ void registerStorageExecutable(StorageFactory & factory) auto scipt_name_with_arguments_value = args.engine_args[0]->as().value.safeGet(); std::vector script_name_with_arguments; - boost::split(script_name_with_arguments, scipt_name_with_arguments_value, [](char c){ return c == ' '; }); + boost::split(script_name_with_arguments, scipt_name_with_arguments_value, [](char c) { return c == ' '; }); auto script_name = script_name_with_arguments[0]; script_name_with_arguments.erase(script_name_with_arguments.begin()); @@ -154,8 +213,8 @@ void registerStorageExecutable(StorageFactory & factory) { ASTPtr query = args.engine_args[i]->children.at(0); if (!query->as()) - throw Exception(ErrorCodes::UNSUPPORTED_METHOD, - "StorageExecutable argument is invalid input query {}", + throw Exception( + ErrorCodes::UNSUPPORTED_METHOD, "StorageExecutable argument is invalid input query {}", query->formatForErrorMessage()); input_queries.emplace_back(std::move(query)); @@ -164,7 +223,35 @@ void registerStorageExecutable(StorageFactory & factory) const auto & columns = args.columns; const auto & constraints = args.constraints; - return StorageExecutable::create(args.table_id, script_name, script_name_with_arguments, format, input_queries, columns, constraints); + if (is_executable_pool) + { + size_t max_command_execution_time = 10; + + size_t max_execution_time_seconds = static_cast(args.getContext()->getSettings().max_execution_time.totalSeconds()); + if (max_execution_time_seconds != 0 && max_command_execution_time > max_execution_time_seconds) + max_command_execution_time = max_execution_time_seconds; + + ExecutablePoolSettings pool_settings; + pool_settings.max_command_execution_time = max_command_execution_time; + if (args.storage_def->settings) + pool_settings.loadFromQuery(*args.storage_def); + + return StorageExecutable::create(args.table_id, script_name, script_name_with_arguments, format, input_queries, pool_settings, columns, constraints); + } + else + { + return StorageExecutable::create(args.table_id, script_name, script_name_with_arguments, format, input_queries, columns, constraints); + } + }; + + factory.registerStorage("Executable", [&](const StorageFactory::Arguments & args) + { + return register_storage(args, false /*is_executable_pool*/); + }); + + factory.registerStorage("ExecutablePool", [&](const StorageFactory::Arguments & args) + { + return register_storage(args, true /*is_executable_pool*/); }); } diff --git a/src/Storages/StorageExecutable.h b/src/Storages/StorageExecutable.h index 033a9a35e7e..8578ec3fee7 100644 --- a/src/Storages/StorageExecutable.h +++ b/src/Storages/StorageExecutable.h @@ -3,19 +3,30 @@ #include #include #include -#include +#include +#include namespace DB { + /** * This class represents table engine for external executable files. + * Executable storage that will start process for read. + * ExecutablePool storage maintain pool of processes and take process from pool for read. */ class StorageExecutable final : public shared_ptr_helper, public IStorage { friend struct shared_ptr_helper; public: - String getName() const override { return "Executable"; } + + String getName() const override + { + if (process_pool) + return "ExecutablePool"; + else + return "Executable"; + } Pipe read( const Names & column_names, @@ -36,12 +47,24 @@ protected: const ColumnsDescription & columns, const ConstraintsDescription & constraints); + StorageExecutable( + const StorageID & table_id, + const String & script_name_, + const std::vector & arguments_, + const String & format_, + const std::vector & input_queries_, + const ExecutablePoolSettings & pool_settings_, + const ColumnsDescription & columns, + const ConstraintsDescription & constraints); + private: String script_name; std::vector arguments; String format; std::vector input_queries; + ExecutablePoolSettings pool_settings; + std::shared_ptr process_pool; Poco::Logger * log; }; -} +} diff --git a/tests/integration/test_executable_table_function/test.py b/tests/integration/test_executable_table_function/test.py index 24bc01a300c..3106cde90c0 100644 --- a/tests/integration/test_executable_table_function/test.py +++ b/tests/integration/test_executable_table_function/test.py @@ -55,3 +55,15 @@ def test_executable_storage_argument(started_cluster): node.query("CREATE TABLE test_table (value String) ENGINE=Executable('test_argument.sh 1', 'TabSeparated')") assert node.query("SELECT * FROM test_table") == 'Key 1\n' node.query("DROP TABLE test_table") + +def test_executable_pool_storage(started_cluster): + node.query("DROP TABLE IF EXISTS test_table") + node.query("CREATE TABLE test_table (value String) ENGINE=ExecutablePool('test_input_process_pool.sh', 'TabSeparated', (SELECT 1))") + assert node.query("SELECT * FROM test_table") == 'Key 1\n' + node.query("DROP TABLE test_table") + +def test_executable_pool_storage_multiple_pipes(started_cluster): + node.query("DROP TABLE IF EXISTS test_table") + node.query("CREATE TABLE test_table (value String) ENGINE=ExecutablePool('test_input_process_pool_multiple_pipes.sh', 'TabSeparated', (SELECT 1), (SELECT 2), (SELECT 3))") + assert node.query("SELECT * FROM test_table") == 'Key from 4 fd 3\nKey from 3 fd 2\nKey from 0 fd 1\n' + node.query("DROP TABLE test_table") diff --git a/tests/integration/test_executable_table_function/user_scripts/test_input_process_pool.sh b/tests/integration/test_executable_table_function/user_scripts/test_input_process_pool.sh new file mode 100755 index 00000000000..f569b2dbbaa --- /dev/null +++ b/tests/integration/test_executable_table_function/user_scripts/test_input_process_pool.sh @@ -0,0 +1,3 @@ +#!/bin/bash + +while read read_data; do printf '1'; printf "Key $read_data\n"; done diff --git a/tests/integration/test_executable_table_function/user_scripts/test_input_process_pool_multiple_pipes.sh b/tests/integration/test_executable_table_function/user_scripts/test_input_process_pool_multiple_pipes.sh new file mode 100755 index 00000000000..6fcc412f86a --- /dev/null +++ b/tests/integration/test_executable_table_function/user_scripts/test_input_process_pool_multiple_pipes.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +read -t 250 -u 4 read_data_from_4_fd; +read -t 250 -u 3 read_data_from_3_fd; +read -t 250 read_data_from_0_df; + +printf '3'; +printf "Key from 4 fd $read_data_from_4_fd\n"; +printf "Key from 3 fd $read_data_from_3_fd\n"; +printf "Key from 0 fd $read_data_from_0_df\n"; From 010985ce87f58d644c9a701bb0df327b4aa3f66a Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 2 Sep 2021 14:54:32 +0300 Subject: [PATCH 049/125] Add comment --- src/Coordination/KeeperStorage.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Coordination/KeeperStorage.cpp b/src/Coordination/KeeperStorage.cpp index 7ad7fa6c779..1e5a74ef5eb 100644 --- a/src/Coordination/KeeperStorage.cpp +++ b/src/Coordination/KeeperStorage.cpp @@ -1078,6 +1078,7 @@ KeeperStorage::ResponsesForSessions KeeperStorage::processRequest(const Coordina zxid = *new_last_zxid; } + /// ZooKeeper update sessions expirity for each request, not only for heartbeats session_expiry_queue.addNewSessionOrUpdate(session_id, session_and_timeout[session_id]); if (zk_request->getOpNum() == Coordination::OpNum::Close) /// Close request is special From f9ca726bf7ad3ec37680443121cf42797e87cd90 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Thu, 2 Sep 2021 16:31:50 +0300 Subject: [PATCH 050/125] PageArena updated implementation --- src/Interpreters/JIT/CHJIT.cpp | 412 +++++++++++++++------------------ 1 file changed, 188 insertions(+), 224 deletions(-) diff --git a/src/Interpreters/JIT/CHJIT.cpp b/src/Interpreters/JIT/CHJIT.cpp index 33c52548a70..7ded01904ae 100644 --- a/src/Interpreters/JIT/CHJIT.cpp +++ b/src/Interpreters/JIT/CHJIT.cpp @@ -4,6 +4,8 @@ #include +#include + #include #include #include @@ -87,6 +89,159 @@ private: llvm::TargetMachine & target_machine; }; +/** Arena that allocate all memory with system page_size. + * All allocated pages can be protected with protection_flags using protect method. + * During destruction all allocated pages protection_flags will be reset. + */ +class PageArena : private boost::noncopyable +{ +public: + PageArena() : page_size(::getPageSize()) {} + + char * allocate(size_t size, size_t alignment) + { + /** First check if in some allocated page blocks there are enought free memory to make allocation. + * If there is no such block create it and then allocate from it. + */ + + for (size_t i = 0; i < page_blocks.size(); ++i) + { + char * result = tryAllocateFromPageBlockWithIndex(size, alignment, i); + if (result) + return result; + } + + allocateNextPageBlock(size); + size_t allocated_page_index = page_blocks.size() - 1; + char * result = tryAllocateFromPageBlockWithIndex(size, alignment, allocated_page_index); + assert(result); + + return result; + } + + inline size_t getAllocatedSize() const { return allocated_size; } + + inline size_t getPageSize() const { return page_size; } + + ~PageArena() + { + protect(PROT_READ | PROT_WRITE); + + for (auto & page_block : page_blocks) + free(page_block.base()); + } + + void protect(int protection_flags) + { + /** The code is partially based on the LLVM codebase + * The LLVM Project is under the Apache License v2.0 with LLVM Exceptions. + */ + +# if defined(__NetBSD__) && defined(PROT_MPROTECT) + protection_flags |= PROT_MPROTECT(PROT_READ | PROT_WRITE | PROT_EXEC); +# endif + + bool invalidate_cache = (protection_flags & PROT_EXEC); + + for (const auto & block : page_blocks) + { +# if defined(__arm__) || defined(__aarch64__) + /// Certain ARM implementations treat icache clear instruction as a memory read, + /// and CPU segfaults on trying to clear cache on !PROT_READ page. + /// Therefore we need to temporarily add PROT_READ for the sake of flushing the instruction caches. + if (invalidate_cache && !(protection_flags & PROT_READ)) + { + int res = mprotect(block.base(), block.blockSize(), protection_flags | PROT_READ); + if (res != 0) + throwFromErrno("Cannot mprotect memory region", ErrorCodes::CANNOT_MPROTECT); + + llvm::sys::Memory::InvalidateInstructionCache(block.base(), block.blockSize()); + InvalidateCache = false; + } +# endif + int res = mprotect(block.base(), block.blockSize(), protection_flags); + if (res != 0) + throwFromErrno("Cannot mprotect memory region", ErrorCodes::CANNOT_MPROTECT); + + if (invalidate_cache) + llvm::sys::Memory::InvalidateInstructionCache(block.base(), block.blockSize()); + } + } + +private: + struct PageBlock + { + public: + PageBlock(void * pages_base_, size_t pages_size_, size_t page_size_) + : pages_base(pages_base_), pages_size(pages_size_), page_size(page_size_) + { + } + + inline void * base() const { return pages_base; } + inline size_t pagesSize() const { return pages_size; } + inline size_t pageSize() const { return page_size; } + inline size_t blockSize() const { return pages_size * page_size; } + + private: + void * pages_base; + size_t pages_size; + size_t page_size; + }; + + std::vector page_blocks; + + std::vector page_blocks_allocated_size; + + size_t page_size = 0; + + size_t allocated_size = 0; + + char * tryAllocateFromPageBlockWithIndex(size_t size, size_t alignment, size_t page_block_index) + { + assert(page_block_index < page_blocks.size()); + auto & pages_block = page_blocks[page_block_index]; + + size_t block_size = pages_block.blockSize(); + size_t & block_allocated_size = page_blocks_allocated_size[page_block_index]; + size_t block_free_size = block_size - block_allocated_size; + + uint8_t * pages_start = static_cast(pages_block.base()); + void * pages_offset = pages_start + block_allocated_size; + + auto * result = std::align(alignment, size, pages_offset, block_free_size); + + if (result) + { + block_allocated_size = reinterpret_cast(result) - pages_start; + return static_cast(result); + } + else + { + return nullptr; + } + } + + void allocateNextPageBlock(size_t size) + { + size_t pages_to_allocate_size = ((size / page_size) + 1) * 2; + size_t allocate_size = page_size * pages_to_allocate_size; + + void * buf = nullptr; + int res = posix_memalign(&buf, page_size, allocate_size); + + if (res != 0) + throwFromErrno( + fmt::format("Cannot allocate memory (posix_memalign) alignment {} size {}.", page_size, ReadableSize(allocate_size)), + ErrorCodes::CANNOT_ALLOCATE_MEMORY, + res); + + page_blocks.emplace_back(buf, pages_to_allocate_size, page_size); + page_blocks_allocated_size.emplace_back(0); + + allocated_size += allocate_size; + } +}; + // class AssemblyPrinter // { // public: @@ -111,234 +266,43 @@ private: /** MemoryManager for module. * Keep total allocated size during RuntimeDyld linker execution. - * Actual compiled code memory is stored in llvm::SectionMemoryManager member, we cannot use ZeroBase optimization here - * because it is required for llvm::SectionMemoryManager::MemoryMapper to live longer than llvm::SectionMemoryManager. */ -class JITModuleMemoryManager +class JITModuleMemoryManager : public llvm::RTDyldMemoryManager { - - class PageBlock - { - public: - PageBlock(void * pages_base_, size_t pages_size_, size_t page_size_) - : pages_base(pages_base_) - , pages_size(pages_size_) - , page_size(page_size_) - {} - - inline void * base() const { return pages_base; } - inline size_t pagesSize() const { return pages_size; } - inline size_t pageSize() const { return page_size; } - inline size_t blockSize() const { return pages_size * page_size; } - - private: - void * pages_base; - size_t pages_size; - size_t page_size; - }; - - class PageArena - { - public: - - PageArena() - : page_size(::getPageSize()) - { - allocateNextPageBlock(); - } - - char * allocate(size_t size, size_t alignment) - { - for (size_t i = 0; i < page_blocks.size(); ++i) - { - char * result = allocateFromPageBlocks(size, alignment, i); - if (result) - return result; - } - - while (true) - { - allocateNextPageBlock(); - size_t allocated_page_index = page_blocks.size() - 1; - char * result = allocateFromPageBlocks(size, alignment, allocated_page_index); - if (result) - return result; - } - } - - inline size_t getAllocatedSize() const - { - return allocated_size; - } - - inline size_t getPageSize() const - { - return page_size; - } - - ~PageArena() - { - for (auto & page_block : page_blocks) - free(page_block.base()); - } - - const std::vector & getPageBlocks() const - { - return page_blocks; - } - - private: - - std::vector page_blocks; - - std::vector page_blocks_allocated_size; - - size_t page_size = 0; - - size_t allocated_size = 0; - - char * allocateFromPageBlocks(size_t size, size_t alignment, size_t page_blocks_index) - { - assert(page_blocks_index < page_blocks.size()); - auto & pages_block = page_blocks[page_blocks_index]; - - size_t block_size = pages_block.blockSize(); - size_t & block_allocated_size = page_blocks_allocated_size[page_blocks_index]; - size_t block_free_size = block_size - block_allocated_size; - - uint8_t * pages_start = static_cast(pages_block.base()); - void * pages_offset = pages_start + block_allocated_size; - - auto * result = std::align( - alignment, - size, - pages_offset, - block_free_size); - - if (result) - { - block_allocated_size = reinterpret_cast(result) - pages_start; - return static_cast(result); - } - else - { - return nullptr; - } - } - - void allocateNextPageBlock() - { - size_t pages_to_allocate_size = (page_blocks.size() * 2) + 1; - size_t allocate_size = page_size * pages_to_allocate_size; - - llvm::errs() << "PageArena::allocatoeNextPageBlock page size " << page_size << " pages_to_allocate_size " << pages_to_allocate_size << "\n"; - - void * buf = nullptr; - int res = posix_memalign(&buf, page_size, allocate_size); - - if (res != 0) - throwFromErrno(fmt::format("Cannot allocate memory (posix_memalign) alignment {} size {}.", - page_size, - ReadableSize(allocate_size)), - ErrorCodes::CANNOT_ALLOCATE_MEMORY, - res); - - page_blocks.emplace_back(buf, pages_to_allocate_size, page_size); - page_blocks_allocated_size.emplace_back(0); - - allocated_size += allocate_size; - } - }; - - class MemoryManager : public llvm::RTDyldMemoryManager - { - public: - uint8_t * allocateCodeSection(uintptr_t size, unsigned alignment, unsigned, llvm::StringRef) override - { - return reinterpret_cast(ex_page_arena.allocate(size, alignment)); - } - - uint8_t * allocateDataSection(uintptr_t size, unsigned alignment, unsigned, llvm::StringRef, bool is_read_only) override - { - if (is_read_only) - return reinterpret_cast(ro_page_arena.allocate(size, alignment)); - else - return reinterpret_cast(rw_page_arena.allocate(size, alignment)); - } - - bool finalizeMemory(std::string *) override - { - protectPages(ro_page_arena, PROT_READ); - protectPages(ex_page_arena, PROT_READ | PROT_EXEC); - return true; - } - - ~MemoryManager() override - { - protectPages(ro_page_arena, PROT_READ | PROT_WRITE); - protectPages(ex_page_arena, PROT_READ | PROT_WRITE); - } - - inline size_t allocatedSize() const - { - size_t data_size = rw_page_arena.getAllocatedSize() + ro_page_arena.getAllocatedSize(); - size_t code_size = ex_page_arena.getAllocatedSize(); - - return data_size + code_size; - } - private: - PageArena rw_page_arena; - PageArena ro_page_arena; - PageArena ex_page_arena; - - static void protectPages(PageArena & arena, int protection_flags) - { - /** The code is partially based on the LLVM codebase - * The LLVM Project is under the Apache License v2.0 with LLVM Exceptions. - */ - const auto & blocks = arena.getPageBlocks(); - -#if defined(__NetBSD__) && defined(PROT_MPROTECT) - protection_flags |= PROT_MPROTECT(PROT_READ | PROT_WRITE | PROT_EXEC); -#endif - - bool invalidate_cache = (protection_flags & PROT_EXEC); - - for (const auto & block : blocks) - { -#if defined(__arm__) || defined(__aarch64__) - /// Certain ARM implementations treat icache clear instruction as a memory read, - /// and CPU segfaults on trying to clear cache on !PROT_READ page. - /// Therefore we need to temporarily add PROT_READ for the sake of flushing the instruction caches. - if (invalidate_cache && !(protection_flags & PROT_READ)) - { - int res = mprotect(block.base(), block.blockSize(), protection_flags | PROT_READ); - if (res != 0) - throwFromErrno("Cannot mprotect memory region", ErrorCodes::CANNOT_MPROTECT); - - llvm::sys::Memory::InvalidateInstructionCache(block.base(), block.blockSize()); - InvalidateCache = false; - } -#endif - int res = mprotect(block.base(), block.blockSize(), protection_flags); - if (res != 0) - throwFromErrno("Cannot mprotect memory region", ErrorCodes::CANNOT_MPROTECT); - - if (invalidate_cache) - llvm::sys::Memory::InvalidateInstructionCache(block.base(), block.blockSize()); - } - } - }; - public: - JITModuleMemoryManager() = default; - inline size_t getAllocatedSize() const { return manager.allocatedSize(); } + uint8_t * allocateCodeSection(uintptr_t size, unsigned alignment, unsigned, llvm::StringRef) override + { + return reinterpret_cast(ex_page_arena.allocate(size, alignment)); + } - inline llvm::RTDyldMemoryManager & getManager() { return manager; } + uint8_t * allocateDataSection(uintptr_t size, unsigned alignment, unsigned, llvm::StringRef, bool is_read_only) override + { + if (is_read_only) + return reinterpret_cast(ro_page_arena.allocate(size, alignment)); + else + return reinterpret_cast(rw_page_arena.allocate(size, alignment)); + } + + bool finalizeMemory(std::string *) override + { + ro_page_arena.protect(PROT_READ); + ex_page_arena.protect(PROT_READ | PROT_EXEC); + return true; + } + + inline size_t allocatedSize() const + { + size_t data_size = rw_page_arena.getAllocatedSize() + ro_page_arena.getAllocatedSize(); + size_t code_size = ex_page_arena.getAllocatedSize(); + + return data_size + code_size; + } private: - MemoryManager manager; + PageArena rw_page_arena; + PageArena ro_page_arena; + PageArena ex_page_arena; }; class JITSymbolResolver : public llvm::LegacyJITSymbolResolver @@ -444,12 +408,12 @@ CHJIT::CompiledModule CHJIT::compileModule(std::unique_ptr module) } std::unique_ptr module_memory_manager = std::make_unique(); - llvm::RuntimeDyld dynamic_linker = {module_memory_manager->getManager(), *symbol_resolver}; + llvm::RuntimeDyld dynamic_linker = {*module_memory_manager, *symbol_resolver}; std::unique_ptr linked_object = dynamic_linker.loadObject(*object.get()); dynamic_linker.resolveRelocations(); - module_memory_manager->getManager().finalizeMemory(); + module_memory_manager->finalizeMemory(nullptr); CompiledModule compiled_module; @@ -470,7 +434,7 @@ CHJIT::CompiledModule CHJIT::compileModule(std::unique_ptr module) compiled_module.function_name_to_symbol.emplace(std::move(function_name), jit_symbol_address); } - compiled_module.size = module_memory_manager->getAllocatedSize(); + compiled_module.size = module_memory_manager->allocatedSize(); compiled_module.identifier = current_module_key; module_identifier_to_memory_manager[current_module_key] = std::move(module_memory_manager); From f1bc3d77207fbbc48918b037fc7d8d2ea73b4564 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 2 Sep 2021 16:54:30 +0300 Subject: [PATCH 051/125] Add test for keeper 2 node configuration --- .../test_keeper_two_nodes_cluster/__init__.py | 1 + .../configs/enable_keeper1.xml | 33 ++++ .../configs/enable_keeper2.xml | 33 ++++ .../configs/use_keeper.xml | 12 ++ .../test_keeper_two_nodes_cluster/test.py | 169 ++++++++++++++++++ 5 files changed, 248 insertions(+) create mode 100644 tests/integration/test_keeper_two_nodes_cluster/__init__.py create mode 100644 tests/integration/test_keeper_two_nodes_cluster/configs/enable_keeper1.xml create mode 100644 tests/integration/test_keeper_two_nodes_cluster/configs/enable_keeper2.xml create mode 100644 tests/integration/test_keeper_two_nodes_cluster/configs/use_keeper.xml create mode 100644 tests/integration/test_keeper_two_nodes_cluster/test.py diff --git a/tests/integration/test_keeper_two_nodes_cluster/__init__.py b/tests/integration/test_keeper_two_nodes_cluster/__init__.py new file mode 100644 index 00000000000..e5a0d9b4834 --- /dev/null +++ b/tests/integration/test_keeper_two_nodes_cluster/__init__.py @@ -0,0 +1 @@ +#!/usr/bin/env python3 diff --git a/tests/integration/test_keeper_two_nodes_cluster/configs/enable_keeper1.xml b/tests/integration/test_keeper_two_nodes_cluster/configs/enable_keeper1.xml new file mode 100644 index 00000000000..21601ff4cc0 --- /dev/null +++ b/tests/integration/test_keeper_two_nodes_cluster/configs/enable_keeper1.xml @@ -0,0 +1,33 @@ + + + 9181 + 1 + /var/lib/clickhouse/coordination/log + /var/lib/clickhouse/coordination/snapshots + + + 5000 + 10000 + 75 + trace + + + + + 1 + node1 + 44444 + true + 3 + + + 2 + node2 + 44444 + true + true + 2 + + + + diff --git a/tests/integration/test_keeper_two_nodes_cluster/configs/enable_keeper2.xml b/tests/integration/test_keeper_two_nodes_cluster/configs/enable_keeper2.xml new file mode 100644 index 00000000000..baee6b578a0 --- /dev/null +++ b/tests/integration/test_keeper_two_nodes_cluster/configs/enable_keeper2.xml @@ -0,0 +1,33 @@ + + + 9181 + 2 + /var/lib/clickhouse/coordination/log + /var/lib/clickhouse/coordination/snapshots + + + 5000 + 10000 + 75 + trace + + + + + 1 + node1 + 44444 + true + 3 + + + 2 + node2 + 44444 + true + true + 2 + + + + diff --git a/tests/integration/test_keeper_two_nodes_cluster/configs/use_keeper.xml b/tests/integration/test_keeper_two_nodes_cluster/configs/use_keeper.xml new file mode 100644 index 00000000000..740b2afaab9 --- /dev/null +++ b/tests/integration/test_keeper_two_nodes_cluster/configs/use_keeper.xml @@ -0,0 +1,12 @@ + + + + node1 + 9181 + + + node2 + 9181 + + + diff --git a/tests/integration/test_keeper_two_nodes_cluster/test.py b/tests/integration/test_keeper_two_nodes_cluster/test.py new file mode 100644 index 00000000000..5b71ddf7087 --- /dev/null +++ b/tests/integration/test_keeper_two_nodes_cluster/test.py @@ -0,0 +1,169 @@ +#!/usr/bin/env python3 + +import pytest +from helpers.cluster import ClickHouseCluster +import random +import string +import os +import time +from multiprocessing.dummy import Pool +from helpers.network import PartitionManager +from helpers.test_tools import assert_eq_with_retry + +cluster = ClickHouseCluster(__file__) +node1 = cluster.add_instance('node1', main_configs=['configs/enable_keeper1.xml', 'configs/use_keeper.xml'], stay_alive=True) +node2 = cluster.add_instance('node2', main_configs=['configs/enable_keeper2.xml', 'configs/use_keeper.xml'], stay_alive=True) + +from kazoo.client import KazooClient, KazooState + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + + yield cluster + + finally: + cluster.shutdown() + +def smaller_exception(ex): + return '\n'.join(str(ex).split('\n')[0:2]) + +def wait_node(node): + for _ in range(100): + zk = None + try: + node.query("SELECT * FROM system.zookeeper WHERE path = '/'") + zk = get_fake_zk(node.name, timeout=30.0) + zk.create("/test", sequence=True) + print("node", node.name, "ready") + break + except Exception as ex: + time.sleep(0.2) + print("Waiting until", node.name, "will be ready, exception", ex) + finally: + if zk: + zk.stop() + zk.close() + else: + raise Exception("Can't wait node", node.name, "to become ready") + +def wait_nodes(): + for node in [node1, node2]: + wait_node(node) + + +def get_fake_zk(nodename, timeout=30.0): + _fake_zk_instance = KazooClient(hosts=cluster.get_instance_ip(nodename) + ":9181", timeout=timeout) + _fake_zk_instance.start() + return _fake_zk_instance + +def test_read_write_two_nodes(started_cluster): + try: + wait_nodes() + node1_zk = get_fake_zk("node1") + node2_zk = get_fake_zk("node2") + + node1_zk.create("/test_read_write_multinode_node1", b"somedata1") + node2_zk.create("/test_read_write_multinode_node2", b"somedata2") + + # stale reads are allowed + while node1_zk.exists("/test_read_write_multinode_node2") is None: + time.sleep(0.1) + + # stale reads are allowed + while node2_zk.exists("/test_read_write_multinode_node1") is None: + time.sleep(0.1) + + assert node2_zk.get("/test_read_write_multinode_node1")[0] == b"somedata1" + assert node1_zk.get("/test_read_write_multinode_node1")[0] == b"somedata1" + + assert node2_zk.get("/test_read_write_multinode_node2")[0] == b"somedata2" + assert node1_zk.get("/test_read_write_multinode_node2")[0] == b"somedata2" + + finally: + try: + for zk_conn in [node1_zk, node2_zk, node3_zk]: + zk_conn.stop() + zk_conn.close() + except: + pass + +def test_read_write_two_nodes_with_blocade(started_cluster): + try: + wait_nodes() + node1_zk = get_fake_zk("node1") + node2_zk = get_fake_zk("node2") + + print("Blocking nodes") + with PartitionManager() as pm: + pm.partition_instances(node2, node1) + + # We will respond conection loss but process this query + # after blocade will be removed + with pytest.raises(Exception): + node1_zk.create("/test_read_write_blocked_node1", b"somedata1") + + # This node is not leader and will not process anything + with pytest.raises(Exception): + node2_zk.create("/test_read_write_blocked_node2", b"somedata2") + + + print("Nodes unblocked") + for i in range(10): + try: + node1_zk = get_fake_zk("node1") + node2_zk = get_fake_zk("node2") + break + except: + time.sleep(0.5) + + + for i in range(100): + try: + node1_zk.create("/test_after_block1", b"somedata12") + break + except: + time.sleep(0.1) + else: + raise Exception("node1 cannot recover after blockade") + + print("Node1 created it's value") + + for i in range(100): + try: + node2_zk.create("/test_after_block2", b"somedata12") + break + except: + time.sleep(0.1) + else: + raise Exception("node2 cannot recover after blockade") + + print("Node2 created it's value") + + assert node1_zk.exists("/test_read_write_blocked_node1") is not None + assert node2_zk.exists("/test_read_write_blocked_node1") is not None + + assert node1_zk.exists("/test_read_write_blocked_node2") is None + assert node2_zk.exists("/test_read_write_blocked_node2") is None + + # stale reads are allowed + while node1_zk.exists("/test_after_block2") is None: + time.sleep(0.1) + + # stale reads are allowed + while node2_zk.exists("/test_after_block1") is None: + time.sleep(0.1) + + assert node1_zk.exists("/test_after_block1") is not None + assert node1_zk.exists("/test_after_block2") is not None + assert node2_zk.exists("/test_after_block1") is not None + assert node2_zk.exists("/test_after_block2") is not None + + finally: + try: + for zk_conn in [node1_zk, node2_zk, node3_zk]: + zk_conn.stop() + zk_conn.close() + except: + pass From 8fbef6c69dda57b9eee814364f1eda88a9f3e5d1 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Thu, 2 Sep 2021 18:30:55 +0300 Subject: [PATCH 052/125] Function dictGet default implementation for nulls --- src/Functions/FunctionsExternalDictionaries.h | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Functions/FunctionsExternalDictionaries.h b/src/Functions/FunctionsExternalDictionaries.h index 5f94a1e1f4b..14a6430723c 100644 --- a/src/Functions/FunctionsExternalDictionaries.h +++ b/src/Functions/FunctionsExternalDictionaries.h @@ -293,7 +293,6 @@ public: size_t getNumberOfArguments() const override { return 0; } bool useDefaultImplementationForConstants() const final { return true; } - bool useDefaultImplementationForNulls() const final { return false; } ColumnNumbers getArgumentsThatAreAlwaysConstant() const final { return {0, 1}; } bool isDeterministic() const override { return false; } From ee7f50396b669734e68453c533f2203cbb31316a Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Thu, 2 Sep 2021 20:22:01 +0300 Subject: [PATCH 053/125] fix nested and scalar columns with dot --- src/DataTypes/NestedUtils.cpp | 5 +++- .../02017_columns_with_dot.reference | 3 +++ .../0_stateless/02017_columns_with_dot.sql | 24 +++++++++++++++++++ 3 files changed, 31 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/02017_columns_with_dot.reference create mode 100644 tests/queries/0_stateless/02017_columns_with_dot.sql diff --git a/src/DataTypes/NestedUtils.cpp b/src/DataTypes/NestedUtils.cpp index 94b3b2f3cf7..4f804a0ca50 100644 --- a/src/DataTypes/NestedUtils.cpp +++ b/src/DataTypes/NestedUtils.cpp @@ -141,7 +141,7 @@ NamesAndTypesList collect(const NamesAndTypesList & names_and_types) auto nested_types = getSubcolumnsOfNested(names_and_types); for (const auto & name_type : names_and_types) - if (!nested_types.count(splitName(name_type.name).first)) + if (!isArray(name_type.type) || !nested_types.count(splitName(name_type.name).first)) res.push_back(name_type); for (const auto & name_type : nested_types) @@ -157,6 +157,9 @@ NamesAndTypesList convertToSubcolumns(const NamesAndTypesList & names_and_types) for (auto & name_type : res) { + if (!isArray(name_type.type)) + continue; + auto split = splitName(name_type.name); if (name_type.isSubcolumn() || split.second.empty()) continue; diff --git a/tests/queries/0_stateless/02017_columns_with_dot.reference b/tests/queries/0_stateless/02017_columns_with_dot.reference new file mode 100644 index 00000000000..5922e56fb56 --- /dev/null +++ b/tests/queries/0_stateless/02017_columns_with_dot.reference @@ -0,0 +1,3 @@ +1 [0,0] 2 [1,1,3] +1 [0,0] 2 [1,1,3] +1 [0,0] 2 [1,1,3] diff --git a/tests/queries/0_stateless/02017_columns_with_dot.sql b/tests/queries/0_stateless/02017_columns_with_dot.sql new file mode 100644 index 00000000000..ae901214d75 --- /dev/null +++ b/tests/queries/0_stateless/02017_columns_with_dot.sql @@ -0,0 +1,24 @@ +DROP TABLE IF EXISTS t_with_dots; +CREATE TABLE t_with_dots (id UInt32, arr Array(UInt32), `b.id` UInt32, `b.arr` Array(UInt32)) ENGINE = Log; + +INSERT INTO t_with_dots VALUES (1, [0, 0], 2, [1, 1, 3]); +SELECT * FROM t_with_dots; + +DROP TABLE t_with_dots; + +CREATE TABLE t_with_dots (id UInt32, arr Array(UInt32), `b.id` UInt32, `b.arr` Array(UInt32)) +ENGINE = MergeTree ORDER BY id; + +INSERT INTO t_with_dots VALUES (1, [0, 0], 2, [1, 1, 3]); +SELECT * FROM t_with_dots; + +DROP TABLE t_with_dots; + +CREATE TABLE t_with_dots (id UInt32, arr Array(UInt32), `b.id` UInt32, `b.arr` Array(UInt32)) +ENGINE = MergeTree ORDER BY id +SETTINGS min_bytes_for_wide_part = 0; + +INSERT INTO t_with_dots VALUES (1, [0, 0], 2, [1, 1, 3]); +SELECT * FROM t_with_dots; + +DROP TABLE t_with_dots; From db7734995789c99aafdf9d82a3feddbf1dec24ab Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Thu, 2 Sep 2021 20:43:42 +0300 Subject: [PATCH 054/125] add unit test for NestedUtils --- src/DataTypes/tests/gtest_NestedUtils.cpp | 43 +++++++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100644 src/DataTypes/tests/gtest_NestedUtils.cpp diff --git a/src/DataTypes/tests/gtest_NestedUtils.cpp b/src/DataTypes/tests/gtest_NestedUtils.cpp new file mode 100644 index 00000000000..c01758b8f0f --- /dev/null +++ b/src/DataTypes/tests/gtest_NestedUtils.cpp @@ -0,0 +1,43 @@ +#include +#include +#include +#include +#include + +using namespace DB; + +GTEST_TEST(NestedUtils, collect) +{ + DataTypePtr uint_type = std::make_shared(); + DataTypePtr array_type = std::make_shared(std::make_shared()); + + const NamesAndTypesList source_columns = + { + {"id", uint_type}, + {"arr1", array_type}, + {"b.id", uint_type}, + {"b.arr1", array_type}, + {"b.arr2", array_type} + }; + + auto nested_type = createNested({uint_type, uint_type}, {"arr1", "arr2"}); + const NamesAndTypesList columns_with_subcolumns = + { + {"id", uint_type}, + {"arr1", array_type}, + {"b.id", uint_type}, + {"b", "arr1", nested_type, array_type}, + {"b", "arr2", nested_type, array_type} + }; + + const NamesAndTypesList columns_with_nested = + { + {"id", uint_type}, + {"arr1", array_type}, + {"b.id", uint_type}, + {"b", nested_type}, + }; + + ASSERT_EQ(Nested::convertToSubcolumns(source_columns).toString(), columns_with_subcolumns.toString()); + ASSERT_EQ(Nested::collect(source_columns).toString(), columns_with_nested.toString()); +} From ed70ed6f71e60adfb9f1ee5d48cb158e4642c7cb Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 2 Sep 2021 00:52:38 +0300 Subject: [PATCH 055/125] Introduce connection_no_wait setting for MySQL engine. This will allow to avoid superfluous sleep during query execution, since this not only not desired behavoiur, but also may hang the server, since if you will execute enough queries that will use MySQL database but will not allow enough connections (or your MySQL server is too slow) then you may run out of threads in the global thread pool. Also note that right now it is possible to get deadlock when the mysql pool is full, consider the following scenario: - you have m1 and m2 mysql tables - you have q1 and q2 queries, bot queries join m1 and m2 - q1 allocated connection for m1 but cannot allocate connection for m2 - q2 allocated connection for m2 but cannot allocate connection for m1 - but to resolve the lock one should give up on the locking while it is not possible right now... And then you got no free threads and this: # grep -h ^202 /proc/$(pgrep clickhouse-serv)/task/*/syscall | cut -d' ' -f2 | sort | uniq -c | sort -nr | head 1554 0x7ffb60b92fe8 # mutex in mysqlxx::PoolWithFailover::get 1375 0x7ffb9f1c4748 # mutex in ::PoolEntryHelper::~PoolEntryHelper from DB::MultiplexedConnections::invalidateReplica 1160 0x7ffb612918b8 # mutex in mysqlxx::PoolWithFailover::get 42 0x7ffb9f057984 # mutex in ThreadPoolImpl::worker *NOTE: 202 is a `futex` with WAIT* (Went with `syscall` because debugging 10k+ threads is not easy, and eventually it may TRAP) --- base/mysqlxx/Pool.cpp | 29 +++++++++++++++++-- base/mysqlxx/Pool.h | 2 +- base/mysqlxx/PoolWithFailover.cpp | 12 +++++--- base/mysqlxx/PoolWithFailover.h | 6 +++- .../table-engines/integrations/mysql.md | 1 + .../MySQL/MaterializedMySQLSyncThread.cpp | 4 +-- src/Storages/MySQL/MySQLSettings.h | 1 + src/Storages/StorageMySQL.cpp | 10 +++++-- 8 files changed, 51 insertions(+), 14 deletions(-) diff --git a/base/mysqlxx/Pool.cpp b/base/mysqlxx/Pool.cpp index 2f47aa67356..cee386311d4 100644 --- a/base/mysqlxx/Pool.cpp +++ b/base/mysqlxx/Pool.cpp @@ -7,10 +7,22 @@ #endif #include - #include - #include +#include + + +namespace +{ + +inline uint64_t clock_gettime_ns(clockid_t clock_type = CLOCK_MONOTONIC) +{ + struct timespec ts; + clock_gettime(clock_type, &ts); + return uint64_t(ts.tv_sec * 1000000000LL + ts.tv_nsec); +} + +} namespace mysqlxx @@ -124,10 +136,15 @@ Pool::~Pool() } -Pool::Entry Pool::get() +Pool::Entry Pool::get(uint64_t wait_timeout) { std::unique_lock lock(mutex); + uint64_t deadline = 0; + /// UINT64_MAX -- wait indefinitely + if (wait_timeout && wait_timeout != UINT64_MAX) + deadline = clock_gettime_ns() + wait_timeout * 1'000'000'000; + initialize(); for (;;) { @@ -153,6 +170,12 @@ Pool::Entry Pool::get() logger.trace("(%s): Unable to create a new connection: Max number of connections has been reached.", getDescription()); } + if (!wait_timeout) + throw Poco::Exception("mysqlxx::Pool is full (wait is disabled, see connection_wait_timeout setting)"); + + if (deadline && clock_gettime_ns() >= deadline) + throw Poco::Exception("mysqlxx::Pool is full (connection_wait_timeout is exceeded)"); + lock.unlock(); logger.trace("(%s): Sleeping for %d seconds.", getDescription(), MYSQLXX_POOL_SLEEP_ON_CONNECT_FAIL); sleepForSeconds(MYSQLXX_POOL_SLEEP_ON_CONNECT_FAIL); diff --git a/base/mysqlxx/Pool.h b/base/mysqlxx/Pool.h index 530e2c78cf2..08d8b85b4ac 100644 --- a/base/mysqlxx/Pool.h +++ b/base/mysqlxx/Pool.h @@ -189,7 +189,7 @@ public: ~Pool(); /// Allocates connection. - Entry get(); + Entry get(uint64_t wait_timeout); /// Allocates connection. /// If database is not accessible, returns empty Entry object. diff --git a/base/mysqlxx/PoolWithFailover.cpp b/base/mysqlxx/PoolWithFailover.cpp index e317ab7f228..14c0db9ecd5 100644 --- a/base/mysqlxx/PoolWithFailover.cpp +++ b/base/mysqlxx/PoolWithFailover.cpp @@ -21,8 +21,9 @@ PoolWithFailover::PoolWithFailover( const unsigned max_connections_, const size_t max_tries_) : max_tries(max_tries_) + , shareable(config_.getBool(config_name_ + ".share_connection", false)) + , wait_timeout(UINT64_MAX) { - shareable = config_.getBool(config_name_ + ".share_connection", false); if (config_.has(config_name_ + ".replica")) { Poco::Util::AbstractConfiguration::Keys replica_keys; @@ -80,9 +81,11 @@ PoolWithFailover::PoolWithFailover( const std::string & password, unsigned default_connections_, unsigned max_connections_, - size_t max_tries_) + size_t max_tries_, + uint64_t wait_timeout_) : max_tries(max_tries_) , shareable(false) + , wait_timeout(wait_timeout_) { /// Replicas have the same priority, but traversed replicas are moved to the end of the queue. for (const auto & [host, port] : addresses) @@ -101,6 +104,7 @@ PoolWithFailover::PoolWithFailover( PoolWithFailover::PoolWithFailover(const PoolWithFailover & other) : max_tries{other.max_tries} , shareable{other.shareable} + , wait_timeout(other.wait_timeout) { if (shareable) { @@ -140,7 +144,7 @@ PoolWithFailover::Entry PoolWithFailover::get() try { - Entry entry = shareable ? pool->get() : pool->tryGet(); + Entry entry = shareable ? pool->get(wait_timeout) : pool->tryGet(); if (!entry.isNull()) { @@ -172,7 +176,7 @@ PoolWithFailover::Entry PoolWithFailover::get() if (full_pool) { app.logger().error("All connections failed, trying to wait on a full pool " + (*full_pool)->getDescription()); - return (*full_pool)->get(); + return (*full_pool)->get(wait_timeout); } std::stringstream message; diff --git a/base/mysqlxx/PoolWithFailover.h b/base/mysqlxx/PoolWithFailover.h index 1c7a63e76c0..2bd5ec9f30a 100644 --- a/base/mysqlxx/PoolWithFailover.h +++ b/base/mysqlxx/PoolWithFailover.h @@ -80,6 +80,8 @@ namespace mysqlxx std::mutex mutex; /// Can the Pool be shared bool shareable; + /// Timeout for waiting free connection. + uint64_t wait_timeout = 0; public: using Entry = Pool::Entry; @@ -96,6 +98,7 @@ namespace mysqlxx * default_connections Number of connection in pool to each replica at start. * max_connections Maximum number of connections in pool to each replica. * max_tries_ Max number of connection tries. + * wait_timeout_ Timeout for waiting free connection. */ PoolWithFailover( const std::string & config_name_, @@ -117,7 +120,8 @@ namespace mysqlxx const std::string & password, unsigned default_connections_ = MYSQLXX_POOL_WITH_FAILOVER_DEFAULT_START_CONNECTIONS, unsigned max_connections_ = MYSQLXX_POOL_WITH_FAILOVER_DEFAULT_MAX_CONNECTIONS, - size_t max_tries_ = MYSQLXX_POOL_WITH_FAILOVER_DEFAULT_MAX_TRIES); + size_t max_tries_ = MYSQLXX_POOL_WITH_FAILOVER_DEFAULT_MAX_TRIES, + uint64_t wait_timeout_ = UINT64_MAX); PoolWithFailover(const PoolWithFailover & other); diff --git a/docs/en/engines/table-engines/integrations/mysql.md b/docs/en/engines/table-engines/integrations/mysql.md index a6402e00bc9..7eac159a645 100644 --- a/docs/en/engines/table-engines/integrations/mysql.md +++ b/docs/en/engines/table-engines/integrations/mysql.md @@ -19,6 +19,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] SETTINGS [connection_pool_size=16, ] [connection_max_tries=3, ] + [connection_wait_timeout=5, ] /* 0 -- do not wait */ [connection_auto_close=true ] ; ``` diff --git a/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp b/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp index 53495aa3cb1..560d2d716c9 100644 --- a/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp +++ b/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp @@ -247,7 +247,7 @@ void MaterializedMySQLSyncThread::assertMySQLAvailable() { try { - checkMySQLVariables(pool.get(), getContext()->getSettingsRef()); + checkMySQLVariables(pool.get(/* wait_timeout= */ UINT64_MAX), getContext()->getSettingsRef()); } catch (const mysqlxx::ConnectionFailed & e) { @@ -729,7 +729,7 @@ void MaterializedMySQLSyncThread::onEvent(Buffers & buffers, const BinlogEventPt { /// Some behaviors(such as changing the value of "binlog_checksum") rotate the binlog file. /// To ensure that the synchronization continues, we need to handle these events - metadata.fetchMasterVariablesValue(pool.get()); + metadata.fetchMasterVariablesValue(pool.get(/* wait_timeout= */ UINT64_MAX)); client.setBinlogChecksum(metadata.binlog_checksum); } else if (receive_event->header.type != HEARTBEAT_EVENT) diff --git a/src/Storages/MySQL/MySQLSettings.h b/src/Storages/MySQL/MySQLSettings.h index da8723c2ea6..872b0607e20 100644 --- a/src/Storages/MySQL/MySQLSettings.h +++ b/src/Storages/MySQL/MySQLSettings.h @@ -17,6 +17,7 @@ class ASTStorage; #define LIST_OF_MYSQL_SETTINGS(M) \ M(UInt64, connection_pool_size, 16, "Size of connection pool (if all connections are in use, the query will wait until some connection will be freed).", 0) \ M(UInt64, connection_max_tries, 3, "Number of retries for pool with failover", 0) \ + M(UInt64, connection_wait_timeout, 5, "Timeout (in seconds) for waiting for free connection (in case of there is already connection_pool_size active connections), 0 - do not wait.", 0) \ M(Bool, connection_auto_close, true, "Auto-close connection after query execution, i.e. disable connection reuse.", 0) \ DECLARE_SETTINGS_TRAITS(MySQLSettingsTraits, LIST_OF_MYSQL_SETTINGS) diff --git a/src/Storages/StorageMySQL.cpp b/src/Storages/StorageMySQL.cpp index 79bb1f59cc7..7f458ef82af 100644 --- a/src/Storages/StorageMySQL.cpp +++ b/src/Storages/StorageMySQL.cpp @@ -267,11 +267,15 @@ void registerStorageMySQL(StorageFactory & factory) throw Exception("connection_pool_size cannot be zero.", ErrorCodes::BAD_ARGUMENTS); auto addresses = parseRemoteDescriptionForExternalDatabase(host_port, max_addresses, 3306); - mysqlxx::PoolWithFailover pool(remote_database, addresses, - username, password, + mysqlxx::PoolWithFailover pool( + remote_database, + addresses, + username, + password, MYSQLXX_POOL_WITH_FAILOVER_DEFAULT_START_CONNECTIONS, mysql_settings.connection_pool_size, - mysql_settings.connection_max_tries); + mysql_settings.connection_max_tries, + mysql_settings.connection_wait_timeout); bool replace_query = false; std::string on_duplicate_clause; From 6d5f01a56bb1715c47de8444bfc85b39228f3081 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 2 Sep 2021 22:32:32 +0300 Subject: [PATCH 056/125] Cover MySQL setting connection_wait_timeout --- tests/integration/test_storage_mysql/test.py | 48 ++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/tests/integration/test_storage_mysql/test.py b/tests/integration/test_storage_mysql/test.py index a044528cacf..c7ede8dede4 100644 --- a/tests/integration/test_storage_mysql/test.py +++ b/tests/integration/test_storage_mysql/test.py @@ -3,7 +3,10 @@ from contextlib import contextmanager ## sudo -H pip install PyMySQL import pymysql.cursors import pytest +import time +import threading from helpers.cluster import ClickHouseCluster +from helpers.client import QueryRuntimeException cluster = ClickHouseCluster(__file__) @@ -319,6 +322,51 @@ CREATE TABLE {}(id UInt32, name String, age UInt32, money UInt32) ENGINE = MySQL conn.close() +# Check that limited connection_wait_timeout (via connection_pool_size=1) will throw. +def test_settings_connection_wait_timeout(started_cluster): + table_name = 'test_settings_connection_wait_timeout' + node1.query(f'DROP TABLE IF EXISTS {table_name}') + wait_timeout = 2 + + conn = get_mysql_conn(started_cluster, cluster.mysql_ip) + drop_mysql_table(conn, table_name) + create_mysql_table(conn, table_name) + + node1.query(''' + CREATE TABLE {} + ( + id UInt32, + name String, + age UInt32, + money UInt32 + ) + ENGINE = MySQL('mysql57:3306', 'clickhouse', '{}', 'root', 'clickhouse') + SETTINGS connection_wait_timeout={}, connection_pool_size=1 + '''.format(table_name, table_name, wait_timeout) + ) + + node1.query("INSERT INTO {} (id, name) SELECT number, concat('name_', toString(number)) from numbers(10) ".format(table_name)) + + def worker(): + node1.query("SELECT sleepEachRow(1) FROM {}".format(table_name)) + + worker_thread = threading.Thread(target=worker) + worker_thread.start() + + # ensure that first query started in worker_thread + time.sleep(1) + + started = time.time() + with pytest.raises(QueryRuntimeException, match=r"Exception: mysqlxx::Pool is full \(connection_wait_timeout is exceeded\)"): + node1.query("SELECT sleepEachRow(1) FROM {}".format(table_name)) + ended = time.time() + assert (ended - started) >= wait_timeout + + worker_thread.join() + + drop_mysql_table(conn, table_name) + conn.close() + if __name__ == '__main__': with contextmanager(started_cluster)() as cluster: for name, instance in list(cluster.instances.items()): From 3a3c3acd18402a75b4435ab68c44037fcfec1314 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 2 Sep 2021 23:19:44 +0300 Subject: [PATCH 057/125] Update src/Coordination/SessionExpiryQueue.h Co-authored-by: tavplubix --- src/Coordination/SessionExpiryQueue.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Coordination/SessionExpiryQueue.h b/src/Coordination/SessionExpiryQueue.h index 8270c8fc6c8..d8925449f2a 100644 --- a/src/Coordination/SessionExpiryQueue.h +++ b/src/Coordination/SessionExpiryQueue.h @@ -56,7 +56,7 @@ public: void addNewSessionOrUpdate(int64_t session_id, int64_t timeout_ms); /// Get all expired sessions - std::unordered_set getExpiredSessions() const; + std::vector getExpiredSessions() const; void clear(); }; From a94932983025a174d44e10b285634fa0b38ff14d Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 2 Sep 2021 23:37:34 +0300 Subject: [PATCH 058/125] Review fixes --- src/Coordination/KeeperServer.cpp | 2 +- src/Coordination/KeeperServer.h | 2 +- src/Coordination/KeeperStateMachine.cpp | 2 +- src/Coordination/KeeperStateMachine.h | 2 +- src/Coordination/KeeperStorage.h | 2 +- src/Coordination/SessionExpiryQueue.cpp | 25 +++++++++---------- src/Coordination/SessionExpiryQueue.h | 3 ++- src/Coordination/tests/gtest_coordination.cpp | 4 +-- 8 files changed, 21 insertions(+), 21 deletions(-) diff --git a/src/Coordination/KeeperServer.cpp b/src/Coordination/KeeperServer.cpp index b6b0ab8cb72..9caea5354bf 100644 --- a/src/Coordination/KeeperServer.cpp +++ b/src/Coordination/KeeperServer.cpp @@ -357,7 +357,7 @@ void KeeperServer::waitInit() throw Exception(ErrorCodes::RAFT_ERROR, "Failed to wait RAFT initialization"); } -std::unordered_set KeeperServer::getDeadSessions() +std::vector KeeperServer::getDeadSessions() { return state_machine->getDeadSessions(); } diff --git a/src/Coordination/KeeperServer.h b/src/Coordination/KeeperServer.h index d1138ccef1a..eb1f8437cc9 100644 --- a/src/Coordination/KeeperServer.h +++ b/src/Coordination/KeeperServer.h @@ -71,7 +71,7 @@ public: RaftAppendResult putRequestBatch(const KeeperStorage::RequestsForSessions & requests); /// Return set of the non-active sessions - std::unordered_set getDeadSessions(); + std::vector getDeadSessions(); bool isLeader() const; diff --git a/src/Coordination/KeeperStateMachine.cpp b/src/Coordination/KeeperStateMachine.cpp index ffbac0656b9..9ef3c7b32f5 100644 --- a/src/Coordination/KeeperStateMachine.cpp +++ b/src/Coordination/KeeperStateMachine.cpp @@ -304,7 +304,7 @@ void KeeperStateMachine::processReadRequest(const KeeperStorage::RequestForSessi responses_queue.push(response); } -std::unordered_set KeeperStateMachine::getDeadSessions() +std::vector KeeperStateMachine::getDeadSessions() { std::lock_guard lock(storage_and_responses_lock); return storage->getDeadSessions(); diff --git a/src/Coordination/KeeperStateMachine.h b/src/Coordination/KeeperStateMachine.h index 32beaaf69e6..0e032e29670 100644 --- a/src/Coordination/KeeperStateMachine.h +++ b/src/Coordination/KeeperStateMachine.h @@ -71,7 +71,7 @@ public: /// Process local read request void processReadRequest(const KeeperStorage::RequestForSession & request_for_session); - std::unordered_set getDeadSessions(); + std::vector getDeadSessions(); void shutdownStorage(); diff --git a/src/Coordination/KeeperStorage.h b/src/Coordination/KeeperStorage.h index 041dab05156..bc9a81bc484 100644 --- a/src/Coordination/KeeperStorage.h +++ b/src/Coordination/KeeperStorage.h @@ -172,7 +172,7 @@ public: } /// Get all dead sessions - std::unordered_set getDeadSessions() + std::vector getDeadSessions() { return session_expiry_queue.getExpiredSessions(); } diff --git a/src/Coordination/SessionExpiryQueue.cpp b/src/Coordination/SessionExpiryQueue.cpp index e19a92c29d7..90c17bf904f 100644 --- a/src/Coordination/SessionExpiryQueue.cpp +++ b/src/Coordination/SessionExpiryQueue.cpp @@ -6,8 +6,8 @@ namespace DB bool SessionExpiryQueue::remove(int64_t session_id) { - auto session_it = session_to_timeout.find(session_id); - if (session_it != session_to_timeout.end()) + auto session_it = session_to_expiration_time.find(session_id); + if (session_it != session_to_expiration_time.end()) { auto set_it = expiry_to_sessions.find(session_it->second); if (set_it != expiry_to_sessions.end()) @@ -17,7 +17,7 @@ bool SessionExpiryQueue::remove(int64_t session_id) if (set_it->second.empty()) expiry_to_sessions.erase(set_it); - session_to_timeout.erase(session_it); + session_to_expiration_time.erase(session_it); return true; } @@ -31,9 +31,9 @@ void SessionExpiryQueue::addNewSessionOrUpdate(int64_t session_id, int64_t timeo /// round up to next interval int64_t new_expiry_time = roundToNextInterval(now + timeout_ms); - auto session_it = session_to_timeout.find(session_id); + auto session_it = session_to_expiration_time.find(session_id); /// We already registered this session - if (session_it != session_to_timeout.end()) + if (session_it != session_to_expiration_time.end()) { int64_t prev_expiry_time = session_it->second; session_it->second = new_expiry_time; @@ -61,7 +61,7 @@ void SessionExpiryQueue::addNewSessionOrUpdate(int64_t session_id, int64_t timeo else { /// Just add sessions to the new bucket - session_to_timeout[session_id] = new_expiry_time; + session_to_expiration_time[session_id] = new_expiry_time; auto set_it = expiry_to_sessions.find(new_expiry_time); if (set_it == expiry_to_sessions.end()) @@ -71,17 +71,16 @@ void SessionExpiryQueue::addNewSessionOrUpdate(int64_t session_id, int64_t timeo } } -std::unordered_set SessionExpiryQueue::getExpiredSessions() const +std::vector SessionExpiryQueue::getExpiredSessions() const { int64_t now = getNowMilliseconds(); - std::unordered_set result; + std::vector result; /// Check all buckets - for (auto it = expiry_to_sessions.begin(); it != expiry_to_sessions.end(); ++it) + for (auto & [expire_time, expired_sessions] : expiry_to_sessions) { - int64_t expire_time_for_sessions = it->first; - if (expire_time_for_sessions <= now) - result.insert(it->second.begin(), it->second.end()); + if (expire_time <= now) + result.insert(result.end(), expired_sessions.begin(), expired_sessions.end()); else break; } @@ -91,7 +90,7 @@ std::unordered_set SessionExpiryQueue::getExpiredSessions() const void SessionExpiryQueue::clear() { - session_to_timeout.clear(); + session_to_expiration_time.clear(); expiry_to_sessions.clear(); } diff --git a/src/Coordination/SessionExpiryQueue.h b/src/Coordination/SessionExpiryQueue.h index d8925449f2a..8581800834d 100644 --- a/src/Coordination/SessionExpiryQueue.h +++ b/src/Coordination/SessionExpiryQueue.h @@ -2,6 +2,7 @@ #include #include #include +#include #include namespace DB @@ -20,7 +21,7 @@ class SessionExpiryQueue { private: /// Session -> timeout ms - std::unordered_map session_to_timeout; + std::unordered_map session_to_expiration_time; /// Expire time -> session expire near this time std::map> expiry_to_sessions; diff --git a/src/Coordination/tests/gtest_coordination.cpp b/src/Coordination/tests/gtest_coordination.cpp index 2c1cddd124b..0dfede5927d 100644 --- a/src/Coordination/tests/gtest_coordination.cpp +++ b/src/Coordination/tests/gtest_coordination.cpp @@ -1383,12 +1383,12 @@ TEST(CoordinationTest, TestSessionExpiryQueue) for (size_t i = 0; i < 2; ++i) { - EXPECT_EQ(queue.getExpiredSessions(), std::unordered_set({})); + EXPECT_EQ(queue.getExpiredSessions(), std::vector({})); std::this_thread::sleep_for(std::chrono::milliseconds(400)); } std::this_thread::sleep_for(std::chrono::milliseconds(700)); - EXPECT_EQ(queue.getExpiredSessions(), std::unordered_set({1})); + EXPECT_EQ(queue.getExpiredSessions(), std::vector({1})); } From 5bab06ed908a45ee37fcba86d4b767c370162284 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Fri, 3 Sep 2021 07:45:11 +0300 Subject: [PATCH 059/125] Bump poco to remove getpid() calls Refs: https://github.com/ClickHouse-Extras/poco/pull/46 --- contrib/poco | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/poco b/contrib/poco index 7351c4691b5..b687c17bc2b 160000 --- a/contrib/poco +++ b/contrib/poco @@ -1 +1 @@ -Subproject commit 7351c4691b5d401f59e3959adfc5b4fa263b32da +Subproject commit b687c17bc2be36b6333a1d7cfffbf9eab65509a9 From 9281c4786b8daaadb0ca57b5059a227be48ba2ad Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 3 Sep 2021 10:10:19 +0300 Subject: [PATCH 060/125] Fix queue test --- src/Coordination/SessionExpiryQueue.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Coordination/SessionExpiryQueue.cpp b/src/Coordination/SessionExpiryQueue.cpp index 90c17bf904f..b6d3843f1d7 100644 --- a/src/Coordination/SessionExpiryQueue.cpp +++ b/src/Coordination/SessionExpiryQueue.cpp @@ -77,7 +77,7 @@ std::vector SessionExpiryQueue::getExpiredSessions() const std::vector result; /// Check all buckets - for (auto & [expire_time, expired_sessions] : expiry_to_sessions) + for (const auto & [expire_time, expired_sessions] : expiry_to_sessions) { if (expire_time <= now) result.insert(result.end(), expired_sessions.begin(), expired_sessions.end()); From a182e621f06b9b6c777bad1bce68e5ada02fdaa3 Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 3 Sep 2021 11:53:26 +0300 Subject: [PATCH 061/125] Maybe it help --- tests/integration/test_keeper_two_nodes_cluster/test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/integration/test_keeper_two_nodes_cluster/test.py b/tests/integration/test_keeper_two_nodes_cluster/test.py index 5b71ddf7087..9c7fa25ab31 100644 --- a/tests/integration/test_keeper_two_nodes_cluster/test.py +++ b/tests/integration/test_keeper_two_nodes_cluster/test.py @@ -92,8 +92,8 @@ def test_read_write_two_nodes(started_cluster): def test_read_write_two_nodes_with_blocade(started_cluster): try: wait_nodes() - node1_zk = get_fake_zk("node1") - node2_zk = get_fake_zk("node2") + node1_zk = get_fake_zk("node1", timeout=5.0) + node2_zk = get_fake_zk("node2", timeout=5.0) print("Blocking nodes") with PartitionManager() as pm: From c03bcc7c0f38b077ea6602b42743041fe418e2f3 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Fri, 3 Sep 2021 11:56:51 +0300 Subject: [PATCH 062/125] Fixed tests --- src/Interpreters/JIT/CHJIT.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/Interpreters/JIT/CHJIT.cpp b/src/Interpreters/JIT/CHJIT.cpp index 7ded01904ae..7a3c1e6c940 100644 --- a/src/Interpreters/JIT/CHJIT.cpp +++ b/src/Interpreters/JIT/CHJIT.cpp @@ -213,6 +213,8 @@ private: if (result) { block_allocated_size = reinterpret_cast(result) - pages_start; + block_allocated_size += size; + return static_cast(result); } else From 7071ef80d01a404923cce6dc3708e546a17c1b92 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Fri, 3 Sep 2021 12:06:16 +0300 Subject: [PATCH 063/125] Fixed typos --- src/Interpreters/JIT/CHJIT.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/JIT/CHJIT.cpp b/src/Interpreters/JIT/CHJIT.cpp index 7a3c1e6c940..3f2a078f58e 100644 --- a/src/Interpreters/JIT/CHJIT.cpp +++ b/src/Interpreters/JIT/CHJIT.cpp @@ -100,7 +100,7 @@ public: char * allocate(size_t size, size_t alignment) { - /** First check if in some allocated page blocks there are enought free memory to make allocation. + /** First check if in some allocated page blocks there are enough free memory to make allocation. * If there is no such block create it and then allocate from it. */ From af7220b6aa3d5802479572afba2e39fae2d3a71e Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Fri, 3 Sep 2021 13:00:40 +0300 Subject: [PATCH 064/125] Updated ShellCommandSource --- src/DataStreams/ShellCommandSource.h | 253 ++++++------------ .../ExecutablePoolDictionarySource.cpp | 14 +- src/Storages/StorageExecutable.cpp | 29 +- src/Storages/StorageExecutable.h | 2 + .../user_scripts/test_input_process_pool.sh | 2 +- .../test_input_process_pool_multiple_pipes.sh | 2 +- 6 files changed, 105 insertions(+), 197 deletions(-) diff --git a/src/DataStreams/ShellCommandSource.h b/src/DataStreams/ShellCommandSource.h index 1559a96b456..befdbc796ba 100644 --- a/src/DataStreams/ShellCommandSource.h +++ b/src/DataStreams/ShellCommandSource.h @@ -20,14 +20,32 @@ namespace DB { -/** A stream, that get child process and sends data tasks. - * For each send data task background thread is created, send data tasks must send data to process input pipes. - * ShellCommandSource receives data from process stdout. +/** A stream, that get child process and sends data using tasks in background threads. + * For each send data task background thread is created. Send data task must send data to process input pipes. + * ShellCommandPoolSource receives data from process stdout. + * + * If process_pool is passed in constructor then after source is destroyed process is returned to pool. */ + +using ProcessPool = BorrowedObjectPool>; + +struct ShellCommandSourceConfiguration +{ + /// Read fixed number of rows from command output + bool read_fixed_number_of_rows = false; + /// Valid only if read_fixed_number_of_rows = true + bool read_number_of_rows_from_process_output = false; + /// Valid only if read_fixed_number_of_rows = true + size_t number_of_rows_to_read = 0; + /// Max block size + size_t max_block_size = DBMS_DEFAULT_BUFFER_SIZE; +}; + class ShellCommandSource final : public SourceWithProgress { public: - using SendDataTask = std::function; + + using SendDataTask = std::function; ShellCommandSource( ContextPtr context, @@ -35,30 +53,52 @@ public: const Block & sample_block, std::unique_ptr && command_, Poco::Logger * log_, - std::vector && send_data_tasks, - size_t max_block_size = DEFAULT_BLOCK_SIZE) + std::vector && send_data_tasks = {}, + const ShellCommandSourceConfiguration & configuration_ = {}, + std::shared_ptr process_pool_ = nullptr) : SourceWithProgress(sample_block) , command(std::move(command_)) + , configuration(configuration_) , log(log_) + , process_pool(process_pool_) { for (auto && send_data_task : send_data_tasks) - send_data_threads.emplace_back([task = std::move(send_data_task)]() { task(); }); + { + send_data_threads.emplace_back([task = std::move(send_data_task), this]() + { + try + { + task(); + } + catch (...) + { + std::lock_guard lock(send_data_lock); + exception_during_send_data = std::current_exception(); + } + }); + } - pipeline.init(Pipe(FormatFactory::instance().getInput(format, command->out, sample_block, context, max_block_size))); - executor = std::make_unique(pipeline); - } + size_t max_block_size = configuration.max_block_size; + + if (configuration.read_fixed_number_of_rows) + { + /** Currently parallel parsing input format cannot read exactly max_block_size rows from input, + * so it will be blocked on ReadBufferFromFileDescriptor because this file descriptor represent pipe that does not have eof. + */ + auto context_for_reading = Context::createCopy(context); + context_for_reading->setSetting("input_format_parallel_parsing", false); + context = context_for_reading; + + if (configuration.read_number_of_rows_from_process_output) + { + readText(configuration.number_of_rows_to_read, command->out); + char dummy; + readChar(dummy, command->out); + } + + max_block_size = configuration.number_of_rows_to_read; + } - ShellCommandSource( - ContextPtr context, - const std::string & format, - const Block & sample_block, - std::unique_ptr && command_, - Poco::Logger * log_, - size_t max_block_size = DEFAULT_BLOCK_SIZE) - : SourceWithProgress(sample_block) - , command(std::move(command_)) - , log(log_) - { pipeline.init(Pipe(FormatFactory::instance().getInput(format, command->out, sample_block, context, max_block_size))); executor = std::make_unique(pipeline); } @@ -68,155 +108,18 @@ public: for (auto & thread : send_data_threads) if (thread.joinable()) thread.join(); - } -protected: - Chunk generate() override - { - Chunk chunk; - executor->pull(chunk); - return chunk; - } - -public: - Status prepare() override - { - auto status = SourceWithProgress::prepare(); - - if (status == Status::Finished) - { - std::string err; - readStringUntilEOF(err, command->err); - if (!err.empty()) - LOG_ERROR(log, "Having stderr: {}", err); - - for (auto & thread : send_data_threads) - if (thread.joinable()) - thread.join(); - - command->wait(); - } - - return status; - } - - String getName() const override { return "ShellCommandSource"; } - -private: - - QueryPipeline pipeline; - std::unique_ptr executor; - std::unique_ptr command; - std::vector send_data_threads; - Poco::Logger * log; -}; - -/** A stream, that get child process and sends data tasks. - * For each send data task background thread is created, send data tasks must send data to process input pipes. - * ShellCommandPoolSource receives data from process stdout. - * - * Main difference with ShellCommandSource is that ShellCommandPoolSource initialized with process_pool and rows_to_read. - * Rows to read are necessary because processes in pool are not destroyed and work in read write loop. - * Source need to finish generating new chunks after rows_to_read rows are generated from process. - * - * If rows_to_read are not specified it is expected that script will output rows_to_read before other data. - * - * After source is destroyed process is returned to pool. - */ - -using ProcessPool = BorrowedObjectPool>; - -class ShellCommandPoolSource final : public SourceWithProgress -{ -public: - using SendDataTask = std::function; - - ShellCommandPoolSource( - ContextPtr context, - const std::string & format, - const Block & sample_block, - std::shared_ptr process_pool_, - std::unique_ptr && command_, - size_t rows_to_read_, - Poco::Logger * log_, - std::vector && send_data_tasks) - : SourceWithProgress(sample_block) - , process_pool(process_pool_) - , command(std::move(command_)) - , rows_to_read(rows_to_read_) - , log(log_) - { - for (auto && send_data_task : send_data_tasks) - { - send_data_threads.emplace_back([task = std::move(send_data_task), this]() - { - try - { - task(); - } - catch (...) - { - std::lock_guard lock(send_data_lock); - exception_during_send_data = std::current_exception(); - } - }); - } - - pipeline.init(Pipe(FormatFactory::instance().getInput(format, command->out, sample_block, context, rows_to_read))); - executor = std::make_unique(pipeline); - } - - ShellCommandPoolSource( - ContextPtr context, - const std::string & format, - const Block & sample_block, - std::shared_ptr process_pool_, - std::unique_ptr && command_, - Poco::Logger * log_, - std::vector && send_data_tasks) - : SourceWithProgress(sample_block) - , process_pool(process_pool_) - , command(std::move(command_)) - , log(log_) - { - for (auto && send_data_task : send_data_tasks) - { - send_data_threads.emplace_back([task = std::move(send_data_task), this]() - { - try - { - task(); - } - catch (...) - { - std::lock_guard lock(send_data_lock); - exception_during_send_data = std::current_exception(); - } - }); - } - - readText(rows_to_read, command->out); - pipeline.init(Pipe(FormatFactory::instance().getInput(format, command->out, sample_block, context, rows_to_read))); - executor = std::make_unique(pipeline); - } - - - ~ShellCommandPoolSource() override - { - for (auto & thread : send_data_threads) - if (thread.joinable()) - thread.join(); - - if (command) + if (command && process_pool) process_pool->returnObject(std::move(command)); } protected: + Chunk generate() override { - rethrowExceptionDuringReadIfNeeded(); + rethrowExceptionDuringSendDataIfNeeded(); - if (current_read_rows == rows_to_read) + if (configuration.read_fixed_number_of_rows && configuration.number_of_rows_to_read == current_read_rows) return {}; Chunk chunk; @@ -238,7 +141,6 @@ protected: return chunk; } -public: Status prepare() override { auto status = SourceWithProgress::prepare(); @@ -249,13 +151,17 @@ public: if (thread.joinable()) thread.join(); - rethrowExceptionDuringReadIfNeeded(); + rethrowExceptionDuringSendDataIfNeeded(); } return status; } - void rethrowExceptionDuringReadIfNeeded() + String getName() const override { return "ShellCommandSource"; } + +private: + + void rethrowExceptionDuringSendDataIfNeeded() { std::lock_guard lock(send_data_lock); if (exception_during_send_data) @@ -265,18 +171,19 @@ public: } } - String getName() const override { return "ShellCommandPoolSource"; } - - std::shared_ptr process_pool; std::unique_ptr command; - QueryPipeline pipeline; - std::unique_ptr executor; - size_t rows_to_read = 0; - Poco::Logger * log; - std::vector send_data_threads; + ShellCommandSourceConfiguration configuration; size_t current_read_rows = 0; + Poco::Logger * log; + + std::shared_ptr process_pool; + + QueryPipeline pipeline; + std::unique_ptr executor; + + std::vector send_data_threads; std::mutex send_data_lock; std::exception_ptr exception_during_send_data; }; diff --git a/src/Dictionaries/ExecutablePoolDictionarySource.cpp b/src/Dictionaries/ExecutablePoolDictionarySource.cpp index e3cad41c856..10164fe7310 100644 --- a/src/Dictionaries/ExecutablePoolDictionarySource.cpp +++ b/src/Dictionaries/ExecutablePoolDictionarySource.cpp @@ -112,15 +112,18 @@ Pipe ExecutablePoolDictionarySource::getStreamForBlock(const Block & block) size_t rows_to_read = block.rows(); auto * process_in = &process->in; - ShellCommandPoolSource::SendDataTask task = [process_in, block, this]() mutable + ShellCommandSource::SendDataTask task = [process_in, block, this]() mutable { auto & out = *process_in; auto output_stream = context->getOutputStream(configuration.format, out, block.cloneEmpty()); formatBlock(output_stream, block); }; - std::vector tasks = {std::move(task)}; + std::vector tasks = {std::move(task)}; - Pipe pipe(std::make_unique(context, configuration.format, sample_block, process_pool, std::move(process), rows_to_read, log, std::move(tasks))); + ShellCommandSourceConfiguration command_configuration; + command_configuration.read_fixed_number_of_rows = true; + command_configuration.number_of_rows_to_read = rows_to_read; + Pipe pipe(std::make_unique(context, configuration.format, sample_block, std::move(process), log, std::move(tasks), command_configuration, process_pool)); if (configuration.implicit_key) pipe.addTransform(std::make_shared(block, pipe.getHeader())); @@ -174,11 +177,6 @@ void registerDictionarySourceExecutablePool(DictionarySourceFactory & factory) ContextMutablePtr context = copyContextAndApplySettingsFromDictionaryConfig(global_context, config, config_prefix); - /** Currently parallel parsing input format cannot read exactly max_block_size rows from input, - * so it will be blocked on ReadBufferFromFileDescriptor because this file descriptor represent pipe that does not have eof. - */ - context->setSetting("input_format_parallel_parsing", false); - String settings_config_prefix = config_prefix + ".executable_pool"; size_t max_command_execution_time = config.getUInt64(settings_config_prefix + ".max_command_execution_time", 10); diff --git a/src/Storages/StorageExecutable.cpp b/src/Storages/StorageExecutable.cpp index 6d81a4eff40..4b0aaf6caea 100644 --- a/src/Storages/StorageExecutable.cpp +++ b/src/Storages/StorageExecutable.cpp @@ -84,8 +84,6 @@ Pipe StorageExecutable::read( size_t max_block_size, unsigned /*threads*/) { - std::cerr << getName() << "::read" << std::endl; - auto user_scripts_path = context->getUserScriptsPath(); auto script_path = user_scripts_path + '/' + script_name; if (!std::filesystem::exists(std::filesystem::path(script_path))) @@ -111,9 +109,9 @@ Pipe StorageExecutable::read( std::unique_ptr process; - if (process_pool) + bool is_executable_pool = (process_pool != nullptr); + if (is_executable_pool) { - std::cerr << getName() <<"::read create process" << std::endl; bool result = process_pool->tryBorrowObject(process, [&config, this]() { config.terminate_in_destructor_strategy = ShellCommand::DestructorStrategy{ true /*terminate_in_destructor*/, pool_settings.command_termination_timeout }; @@ -153,7 +151,7 @@ Pipe StorageExecutable::read( write_buffer = &it->second; } - ShellCommandSource::SendDataTask task = [input_stream, write_buffer, context, this]() + ShellCommandSource::SendDataTask task = [input_stream, write_buffer, context, is_executable_pool, this]() { auto output_stream = context->getOutputStream(format, *write_buffer, input_stream->getHeader().cloneEmpty()); input_stream->readPrefix(); @@ -166,7 +164,9 @@ Pipe StorageExecutable::read( output_stream->writeSuffix(); output_stream->flush(); - write_buffer->close(); + + if (!is_executable_pool) + write_buffer->close(); }; tasks.emplace_back(std::move(task)); @@ -174,16 +174,17 @@ Pipe StorageExecutable::read( auto sample_block = metadata_snapshot->getSampleBlock(); - if (process_pool) + ShellCommandSourceConfiguration configuration; + configuration.max_block_size = max_block_size; + + if (is_executable_pool) { - Pipe pipe(std::make_unique(context, format, std::move(sample_block), process_pool, std::move(process), log, std::move(tasks))); - return pipe; - } - else - { - Pipe pipe(std::make_unique(context, format, std::move(sample_block), std::move(process), log, std::move(tasks), max_block_size)); - return pipe; + configuration.read_fixed_number_of_rows = true; + configuration.read_number_of_rows_from_process_output = true; } + + Pipe pipe(std::make_unique(context, format, std::move(sample_block), std::move(process), log, std::move(tasks), configuration, process_pool)); + return pipe; } void registerStorageExecutable(StorageFactory & factory) diff --git a/src/Storages/StorageExecutable.h b/src/Storages/StorageExecutable.h index 8578ec3fee7..dd986ee3956 100644 --- a/src/Storages/StorageExecutable.h +++ b/src/Storages/StorageExecutable.h @@ -18,6 +18,7 @@ namespace DB class StorageExecutable final : public shared_ptr_helper, public IStorage { friend struct shared_ptr_helper; + public: String getName() const override @@ -38,6 +39,7 @@ public: unsigned threads) override; protected: + StorageExecutable( const StorageID & table_id, const String & script_name_, diff --git a/tests/integration/test_executable_table_function/user_scripts/test_input_process_pool.sh b/tests/integration/test_executable_table_function/user_scripts/test_input_process_pool.sh index f569b2dbbaa..ed40a0d5291 100755 --- a/tests/integration/test_executable_table_function/user_scripts/test_input_process_pool.sh +++ b/tests/integration/test_executable_table_function/user_scripts/test_input_process_pool.sh @@ -1,3 +1,3 @@ #!/bin/bash -while read read_data; do printf '1'; printf "Key $read_data\n"; done +while read read_data; do printf "1\n"; printf "Key $read_data\n"; done diff --git a/tests/integration/test_executable_table_function/user_scripts/test_input_process_pool_multiple_pipes.sh b/tests/integration/test_executable_table_function/user_scripts/test_input_process_pool_multiple_pipes.sh index 6fcc412f86a..4408ccae756 100755 --- a/tests/integration/test_executable_table_function/user_scripts/test_input_process_pool_multiple_pipes.sh +++ b/tests/integration/test_executable_table_function/user_scripts/test_input_process_pool_multiple_pipes.sh @@ -4,7 +4,7 @@ read -t 250 -u 4 read_data_from_4_fd; read -t 250 -u 3 read_data_from_3_fd; read -t 250 read_data_from_0_df; -printf '3'; +printf "3\n"; printf "Key from 4 fd $read_data_from_4_fd\n"; printf "Key from 3 fd $read_data_from_3_fd\n"; printf "Key from 0 fd $read_data_from_0_df\n"; From 497c2252037f01af170078d513276dbf96d8c459 Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 3 Sep 2021 13:07:40 +0300 Subject: [PATCH 065/125] Test log level for CI --- base/common/logger_useful.h | 1 + contrib/poco | 2 +- programs/server/config.xml | 1 + src/Common/tests/gtest_log.cpp | 30 +++++++++++++++++++++++++ src/Coordination/KeeperStateMachine.cpp | 4 ++++ src/Core/SettingsEnums.cpp | 4 ++-- src/Core/SettingsEnums.h | 1 + tests/config/config.d/logger.xml | 11 +++++++++ tests/config/install.sh | 1 + 9 files changed, 52 insertions(+), 3 deletions(-) create mode 100644 tests/config/config.d/logger.xml diff --git a/base/common/logger_useful.h b/base/common/logger_useful.h index d3b4d38d546..e2290a727b4 100644 --- a/base/common/logger_useful.h +++ b/base/common/logger_useful.h @@ -42,6 +42,7 @@ namespace } while (false) +#define LOG_TEST(logger, ...) LOG_IMPL(logger, DB::LogsLevel::test, Poco::Message::PRIO_TEST, __VA_ARGS__) #define LOG_TRACE(logger, ...) LOG_IMPL(logger, DB::LogsLevel::trace, Poco::Message::PRIO_TRACE, __VA_ARGS__) #define LOG_DEBUG(logger, ...) LOG_IMPL(logger, DB::LogsLevel::debug, Poco::Message::PRIO_DEBUG, __VA_ARGS__) #define LOG_INFO(logger, ...) LOG_IMPL(logger, DB::LogsLevel::information, Poco::Message::PRIO_INFORMATION, __VA_ARGS__) diff --git a/contrib/poco b/contrib/poco index 7351c4691b5..15883876a75 160000 --- a/contrib/poco +++ b/contrib/poco @@ -1 +1 @@ -Subproject commit 7351c4691b5d401f59e3959adfc5b4fa263b32da +Subproject commit 15883876a758bf6f407b22ea4ad0ad2f9465bee6 diff --git a/programs/server/config.xml b/programs/server/config.xml index 510a5e230f8..18bb23c6227 100644 --- a/programs/server/config.xml +++ b/programs/server/config.xml @@ -18,6 +18,7 @@ - information - debug - trace + - test (not for production usage) [1]: https://github.com/pocoproject/poco/blob/poco-1.9.4-release/Foundation/include/Poco/Logger.h#L105-L114 --> diff --git a/src/Common/tests/gtest_log.cpp b/src/Common/tests/gtest_log.cpp index 9f4ef41f642..a7c49497c35 100644 --- a/src/Common/tests/gtest_log.cpp +++ b/src/Common/tests/gtest_log.cpp @@ -6,6 +6,8 @@ #include #include #include +#include +#include TEST(Logger, Log) @@ -17,3 +19,31 @@ TEST(Logger, Log) /// This test checks that we don't pass this string to fmtlib, because it is the only argument. EXPECT_NO_THROW(LOG_INFO(log, "Hello {} World")); } + +TEST(Logger, TestLog) +{ + { /// Test logs visible for test level + Poco::Logger::root().setLevel("test"); + std::ostringstream oss; + Poco::Logger::root().setChannel(Poco::AutoPtr(new Poco::StreamChannel(oss))); + Poco::Logger * log = &Poco::Logger::get("Log"); + LOG_TEST(log, "Hello World"); + + EXPECT_EQ(oss.str(), "Hello World\n"); + } + + { /// Test logs invisible for other levels + for (const auto & level : {"trace", "debug", "information", "warning", "fatal"}) + { + Poco::Logger::root().setLevel(level); + std::ostringstream oss; + Poco::Logger::root().setChannel(Poco::AutoPtr(new Poco::StreamChannel(oss))); + Poco::Logger * log = &Poco::Logger::get("Log"); + + LOG_TEST(log, "Hello World"); + + EXPECT_EQ(oss.str(), ""); + } + } + +} diff --git a/src/Coordination/KeeperStateMachine.cpp b/src/Coordination/KeeperStateMachine.cpp index ffbac0656b9..ed808c15708 100644 --- a/src/Coordination/KeeperStateMachine.cpp +++ b/src/Coordination/KeeperStateMachine.cpp @@ -122,6 +122,10 @@ nuraft::ptr KeeperStateMachine::commit(const uint64_t log_idx, n } else { + LOG_TEST(log, "Commit request for session {} with type {}, log id {}{}", + request_for_session.session_id, toString(request_for_session.request->getOpNum()), log_idx, + request_for_session.request->getPath().empty() ? "" : ", path " + request_for_session.request->getPath()); + std::lock_guard lock(storage_and_responses_lock); KeeperStorage::ResponsesForSessions responses_for_sessions = storage->processRequest(request_for_session.request, request_for_session.session_id, log_idx); for (auto & response_for_session : responses_for_sessions) diff --git a/src/Core/SettingsEnums.cpp b/src/Core/SettingsEnums.cpp index 213d365ad96..8e588b62326 100644 --- a/src/Core/SettingsEnums.cpp +++ b/src/Core/SettingsEnums.cpp @@ -79,8 +79,8 @@ IMPLEMENT_SETTING_ENUM(LogsLevel, ErrorCodes::BAD_ARGUMENTS, {"warning", LogsLevel::warning}, {"information", LogsLevel::information}, {"debug", LogsLevel::debug}, - {"trace", LogsLevel::trace}}) - + {"trace", LogsLevel::trace}, + {"test", LogsLevel::test}}) IMPLEMENT_SETTING_ENUM_WITH_RENAME(LogQueriesType, ErrorCodes::BAD_ARGUMENTS, {{"QUERY_START", QUERY_START}, diff --git a/src/Core/SettingsEnums.h b/src/Core/SettingsEnums.h index d1dc71f621f..a699da3062c 100644 --- a/src/Core/SettingsEnums.h +++ b/src/Core/SettingsEnums.h @@ -94,6 +94,7 @@ enum class LogsLevel information, debug, trace, + test, }; DECLARE_SETTING_ENUM(LogsLevel) diff --git a/tests/config/config.d/logger.xml b/tests/config/config.d/logger.xml new file mode 100644 index 00000000000..5b6f976b5f8 --- /dev/null +++ b/tests/config/config.d/logger.xml @@ -0,0 +1,11 @@ + + + test + /var/log/clickhouse-server/clickhouse-server.log + /var/log/clickhouse-server/clickhouse-server.err.log + 1000M + 10 + /var/log/clickhouse-server/stderr.log + /var/log/clickhouse-server/stdout.log + + diff --git a/tests/config/install.sh b/tests/config/install.sh index 7dd2559e505..67cc4ef99d6 100755 --- a/tests/config/install.sh +++ b/tests/config/install.sh @@ -38,6 +38,7 @@ ln -sf $SRC_PATH/config.d/top_level_domains_lists.xml $DEST_SERVER_PATH/config.d ln -sf $SRC_PATH/config.d/top_level_domains_path.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/encryption.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/zookeeper_log.xml $DEST_SERVER_PATH/config.d/ +ln -sf $SRC_PATH/config.d/logger.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/users.d/log_queries.xml $DEST_SERVER_PATH/users.d/ ln -sf $SRC_PATH/users.d/readonly.xml $DEST_SERVER_PATH/users.d/ ln -sf $SRC_PATH/users.d/access_management.xml $DEST_SERVER_PATH/users.d/ From bb87d01579ec965478c2d05f0b98b7bbec4b5713 Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Fri, 3 Sep 2021 18:49:01 +0800 Subject: [PATCH 066/125] Fix wrong header of minmax_count projection --- src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp | 2 +- .../0_stateless/01710_minmax_count_projection.reference | 1 + tests/queries/0_stateless/01710_minmax_count_projection.sql | 3 +++ 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index 734a67da758..92529b00faa 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -174,7 +174,7 @@ QueryPlanPtr MergeTreeDataSelectExecutor::read( if (query_info.projection->desc->is_minmax_count_projection) { Pipe pipe(std::make_shared( - query_info.minmax_count_projection_block, + query_info.minmax_count_projection_block.cloneEmpty(), Chunk(query_info.minmax_count_projection_block.getColumns(), query_info.minmax_count_projection_block.rows()))); auto read_from_pipe = std::make_unique(std::move(pipe)); projection_plan->addStep(std::move(read_from_pipe)); diff --git a/tests/queries/0_stateless/01710_minmax_count_projection.reference b/tests/queries/0_stateless/01710_minmax_count_projection.reference index 882d808069e..ad9b87b998d 100644 --- a/tests/queries/0_stateless/01710_minmax_count_projection.reference +++ b/tests/queries/0_stateless/01710_minmax_count_projection.reference @@ -2,3 +2,4 @@ 0 9998 5000 1 9999 5000 0 9998 5000 +1 diff --git a/tests/queries/0_stateless/01710_minmax_count_projection.sql b/tests/queries/0_stateless/01710_minmax_count_projection.sql index 3ee19fe8c2e..58af11f01f7 100644 --- a/tests/queries/0_stateless/01710_minmax_count_projection.sql +++ b/tests/queries/0_stateless/01710_minmax_count_projection.sql @@ -11,4 +11,7 @@ select min(i), max(i), count() from d group by _partition_id order by _partition select min(i), max(i), count() from d where _partition_value.1 = 0 group by _partition_id order by _partition_id; select min(i), max(i), count() from d where _partition_value.1 = 10 group by _partition_id order by _partition_id; +-- fuzz crash +select min(i) from d where 1 = _partition_value.1; + drop table d; From 9c480c0de6502322a65bd35b8b0a29ee5b07a13b Mon Sep 17 00:00:00 2001 From: Artur <613623@mail.ru> Date: Fri, 3 Sep 2021 10:56:18 +0000 Subject: [PATCH 067/125] remove recursion in ZstdInflatingReadBuffer --- src/IO/ZstdInflatingReadBuffer.cpp | 59 ++++++++++++++++-------------- 1 file changed, 32 insertions(+), 27 deletions(-) diff --git a/src/IO/ZstdInflatingReadBuffer.cpp b/src/IO/ZstdInflatingReadBuffer.cpp index 6c03ea420a9..5694b333912 100644 --- a/src/IO/ZstdInflatingReadBuffer.cpp +++ b/src/IO/ZstdInflatingReadBuffer.cpp @@ -28,41 +28,46 @@ ZstdInflatingReadBuffer::~ZstdInflatingReadBuffer() bool ZstdInflatingReadBuffer::nextImpl() { - if (eof) - return false; - - if (input.pos >= input.size) + do { - in->nextIfAtEnd(); - input.src = reinterpret_cast(in->position()); - input.pos = 0; - input.size = in->buffer().end() - in->position(); - } + // If it is known that end of file was reached, return false + if (eof) + return false; - output.dst = reinterpret_cast(internal_buffer.begin()); - output.size = internal_buffer.size(); - output.pos = 0; + /// If end was reached, get next part + if (input.pos >= input.size) + { + in->nextIfAtEnd(); + input.src = reinterpret_cast(in->position()); + input.pos = 0; + input.size = in->buffer().end() - in->position(); + } - size_t ret = ZSTD_decompressStream(dctx, &output, &input); - if (ZSTD_isError(ret)) - throw Exception( - ErrorCodes::ZSTD_DECODER_FAILED, "Zstd stream decoding failed: error code: {}; zstd version: {}", ret, ZSTD_VERSION_STRING); + /// fill output + output.dst = reinterpret_cast(internal_buffer.begin()); + output.size = internal_buffer.size(); + output.pos = 0; - in->position() = in->buffer().begin() + input.pos; - working_buffer.resize(output.pos); + /// Decompress data and check errors. + size_t ret = ZSTD_decompressStream(dctx, &output, &input); + if (ZSTD_isError(ret)) + throw Exception( + ErrorCodes::ZSTD_DECODER_FAILED, "Zstd stream decoding failed: error code: {}; zstd version: {}", ret, ZSTD_VERSION_STRING); - if (in->eof()) - { - eof = true; - return !working_buffer.empty(); - } - else if (output.pos == 0) - { + /// move position to the end of read data + in->position() = in->buffer().begin() + input.pos; + working_buffer.resize(output.pos); + + /// If end of file is reached, fill eof variable and return true if there is some data in buffer, otherwise return false + if (in->eof()) + { + eof = true; + return !working_buffer.empty(); + } /// It is possible, that input buffer is not at eof yet, but nothing was decompressed in current iteration. /// But there are cases, when such behaviour is not allowed - i.e. if input buffer is not eof, then /// it has to be guaranteed that working_buffer is not empty. So if it is empty, continue. - return nextImpl(); - } + } while (output.pos == 0); return true; } From 69e7e1dff7dd2a14bea037fb8facfad2ffbe8084 Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 3 Sep 2021 14:00:57 +0300 Subject: [PATCH 068/125] Update Dockerfile --- docker/packager/deb/Dockerfile | 5 ----- 1 file changed, 5 deletions(-) diff --git a/docker/packager/deb/Dockerfile b/docker/packager/deb/Dockerfile index b6f23c55aa6..22bba94f250 100644 --- a/docker/packager/deb/Dockerfile +++ b/docker/packager/deb/Dockerfile @@ -37,9 +37,7 @@ RUN curl -O https://clickhouse-datasets.s3.yandex.net/utils/1/dpkg-deb \ RUN apt-get update \ && apt-get install \ alien \ - clang-11 \ clang-12 \ - clang-tidy-11 \ clang-tidy-12 \ cmake \ debhelper \ @@ -47,10 +45,7 @@ RUN apt-get update \ gdb \ git \ gperf \ - lld-11 \ lld-12 \ - llvm-11 \ - llvm-11-dev \ llvm-12 \ llvm-12-dev \ moreutils \ From 4fe3909d74425b0ddf168369c176889a2931caad Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 3 Sep 2021 14:01:47 +0300 Subject: [PATCH 069/125] Update Dockerfile --- docker/packager/binary/Dockerfile | 5 ----- 1 file changed, 5 deletions(-) diff --git a/docker/packager/binary/Dockerfile b/docker/packager/binary/Dockerfile index 05834fec493..f5d496ce97f 100644 --- a/docker/packager/binary/Dockerfile +++ b/docker/packager/binary/Dockerfile @@ -39,8 +39,6 @@ RUN apt-get update \ bash \ build-essential \ ccache \ - clang-11 \ - clang-tidy-11 \ cmake \ curl \ g++-10 \ @@ -50,9 +48,6 @@ RUN apt-get update \ gperf \ libicu-dev \ libreadline-dev \ - lld-11 \ - llvm-11 \ - llvm-11-dev \ clang-12 \ clang-tidy-12 \ lld-12 \ From 04e5e52dbdd4074b396b898902ad71000425ab61 Mon Sep 17 00:00:00 2001 From: Artur <613623@mail.ru> Date: Fri, 3 Sep 2021 11:18:19 +0000 Subject: [PATCH 070/125] prevent endless loop --- src/IO/ZstdInflatingReadBuffer.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/IO/ZstdInflatingReadBuffer.cpp b/src/IO/ZstdInflatingReadBuffer.cpp index 5694b333912..da6768f7c4a 100644 --- a/src/IO/ZstdInflatingReadBuffer.cpp +++ b/src/IO/ZstdInflatingReadBuffer.cpp @@ -54,6 +54,9 @@ bool ZstdInflatingReadBuffer::nextImpl() throw Exception( ErrorCodes::ZSTD_DECODER_FAILED, "Zstd stream decoding failed: error code: {}; zstd version: {}", ret, ZSTD_VERSION_STRING); + /// Check that something has changed after decompress (input or output position) + assert(output.pos > 0 || in->position() < in->buffer().begin() + input.pos); + /// move position to the end of read data in->position() = in->buffer().begin() + input.pos; working_buffer.resize(output.pos); From 8b8b0043c1493bf2b506f0d0d91356190780cf56 Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 3 Sep 2021 14:20:09 +0300 Subject: [PATCH 071/125] Fix logger conf --- tests/config/config.d/logger.xml | 6 ------ 1 file changed, 6 deletions(-) diff --git a/tests/config/config.d/logger.xml b/tests/config/config.d/logger.xml index 5b6f976b5f8..6a52c8d68a2 100644 --- a/tests/config/config.d/logger.xml +++ b/tests/config/config.d/logger.xml @@ -1,11 +1,5 @@ test - /var/log/clickhouse-server/clickhouse-server.log - /var/log/clickhouse-server/clickhouse-server.err.log - 1000M - 10 - /var/log/clickhouse-server/stderr.log - /var/log/clickhouse-server/stdout.log From 9345b3c45ed40be2e45c381f096fd7c32267649f Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 3 Sep 2021 14:59:21 +0300 Subject: [PATCH 072/125] Fixup flaky --- tests/integration/test_keeper_two_nodes_cluster/test.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/tests/integration/test_keeper_two_nodes_cluster/test.py b/tests/integration/test_keeper_two_nodes_cluster/test.py index 9c7fa25ab31..e6e3eb37af2 100644 --- a/tests/integration/test_keeper_two_nodes_cluster/test.py +++ b/tests/integration/test_keeper_two_nodes_cluster/test.py @@ -141,12 +141,6 @@ def test_read_write_two_nodes_with_blocade(started_cluster): print("Node2 created it's value") - assert node1_zk.exists("/test_read_write_blocked_node1") is not None - assert node2_zk.exists("/test_read_write_blocked_node1") is not None - - assert node1_zk.exists("/test_read_write_blocked_node2") is None - assert node2_zk.exists("/test_read_write_blocked_node2") is None - # stale reads are allowed while node1_zk.exists("/test_after_block2") is None: time.sleep(0.1) From 09a9ad0dfe3245458769f39f3caa7960d347cc26 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Fri, 3 Sep 2021 16:15:56 +0200 Subject: [PATCH 073/125] Improve [C|T]SV errors --- src/Processors/Formats/Impl/CSVRowInputFormat.cpp | 4 ++-- src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp index 93c39995e34..d62cc112d1a 100644 --- a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp @@ -322,7 +322,7 @@ bool CSVRowInputFormat::parseRowAndPrintDiagnosticInfo(MutableColumns & columns, verbosePrintString(in.position(), in.position() + 1, out); out << " found instead.\n" " It's like your file has more columns than expected.\n" - "And if your file have right number of columns, maybe it have unquoted string value with comma.\n"; + "And if your file has the right number of columns, maybe it has an unquoted string value with a comma.\n"; return false; } @@ -341,7 +341,7 @@ bool CSVRowInputFormat::parseRowAndPrintDiagnosticInfo(MutableColumns & columns, { out << "ERROR: Line feed found where delimiter (" << delimiter << ") is expected." " It's like your file has less columns than expected.\n" - "And if your file have right number of columns, maybe it have unescaped quotes in values.\n"; + "And if your file has the right number of columns, maybe it has unescaped quotes in values.\n"; } else { diff --git a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp index f89b76342a4..5d56ed1327e 100644 --- a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp @@ -284,7 +284,7 @@ bool TabSeparatedRowInputFormat::parseRowAndPrintDiagnosticInfo(MutableColumns & { out << "ERROR: Tab found where line feed is expected." " It's like your file has more columns than expected.\n" - "And if your file have right number of columns, maybe it have unescaped tab in value.\n"; + "And if your file has the right number of columns, maybe it has an unescaped tab in a value.\n"; } else if (*in.position() == '\r') { @@ -313,8 +313,8 @@ bool TabSeparatedRowInputFormat::parseRowAndPrintDiagnosticInfo(MutableColumns & { out << "ERROR: Line feed found where tab is expected." " It's like your file has less columns than expected.\n" - "And if your file have right number of columns, " - "maybe it have unescaped backslash in value before tab, which cause tab has escaped.\n"; + "And if your file has the right number of columns, " + "maybe it has an unescaped backslash in value before tab, which causes the tab to be escaped.\n"; } else if (*in.position() == '\r') { From 5c8e6345842abedf767dc020b39fae28dc0bb331 Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Fri, 3 Sep 2021 17:46:26 +0300 Subject: [PATCH 074/125] Update StorageTableFunction.h --- src/Storages/StorageTableFunction.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/Storages/StorageTableFunction.h b/src/Storages/StorageTableFunction.h index d44133a1a89..2edc988fca4 100644 --- a/src/Storages/StorageTableFunction.h +++ b/src/Storages/StorageTableFunction.h @@ -142,9 +142,11 @@ public: StoragePolicyPtr getStoragePolicy() const override { + std::lock_guard lock{nested_mutex}; if (nested) - return StorageProxy::getStoragePolicy(); - return IStorage::getStoragePolicy(); + assert(nested->getStoragePolicy() == {}); + /// Table functions cannot have storage policy. + return {}; } private: From 65ff5fd052a0a58a674efec944aa4e6fb64bc99b Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 3 Sep 2021 17:57:29 +0300 Subject: [PATCH 075/125] Rewrite test --- src/Common/tests/gtest_log.cpp | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/src/Common/tests/gtest_log.cpp b/src/Common/tests/gtest_log.cpp index a7c49497c35..e60d87a30c0 100644 --- a/src/Common/tests/gtest_log.cpp +++ b/src/Common/tests/gtest_log.cpp @@ -23,26 +23,29 @@ TEST(Logger, Log) TEST(Logger, TestLog) { { /// Test logs visible for test level - Poco::Logger::root().setLevel("test"); - std::ostringstream oss; - Poco::Logger::root().setChannel(Poco::AutoPtr(new Poco::StreamChannel(oss))); - Poco::Logger * log = &Poco::Logger::get("Log"); + + std::ostringstream oss; // STYLE_CHECK_ALLOW_STD_STRING_STREAM + auto my_channel = Poco::AutoPtr(new Poco::StreamChannel(oss)); + auto log = &Poco::Logger::create("TestLogger", my_channel.get()); + log->setLevel("test"); LOG_TEST(log, "Hello World"); EXPECT_EQ(oss.str(), "Hello World\n"); + Poco::Logger::destroy("TestLogger"); } { /// Test logs invisible for other levels - for (const auto & level : {"trace", "debug", "information", "warning", "fatal"}) + for (const auto & level : {"trace", "debug", "information", "warning", "error", "fatal"}) { - Poco::Logger::root().setLevel(level); - std::ostringstream oss; - Poco::Logger::root().setChannel(Poco::AutoPtr(new Poco::StreamChannel(oss))); - Poco::Logger * log = &Poco::Logger::get("Log"); - + std::ostringstream oss; // STYLE_CHECK_ALLOW_STD_STRING_STREAM + auto my_channel = Poco::AutoPtr(new Poco::StreamChannel(oss)); + auto log = &Poco::Logger::create(std::string{level} + "_Logger", my_channel.get()); + log->setLevel(level); LOG_TEST(log, "Hello World"); EXPECT_EQ(oss.str(), ""); + + Poco::Logger::destroy(std::string{level} + "_Logger"); } } From 5fd9ad3ba918ff070d925b838038aff3461a86a0 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Fri, 3 Sep 2021 18:19:09 +0300 Subject: [PATCH 076/125] Update version_date.tsv after release 21.6.9.7 --- utils/list-versions/version_date.tsv | 1 + 1 file changed, 1 insertion(+) diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv index 2341552a977..bce6aee6d90 100644 --- a/utils/list-versions/version_date.tsv +++ b/utils/list-versions/version_date.tsv @@ -9,6 +9,7 @@ v21.7.5.29-stable 2021-07-28 v21.7.4.18-stable 2021-07-17 v21.7.3.14-stable 2021-07-13 v21.7.2.7-stable 2021-07-09 +v21.6.9.7-stable 2021-09-02 v21.6.8.62-stable 2021-07-13 v21.6.7.57-stable 2021-07-09 v21.6.6.51-stable 2021-07-02 From 2486b6d53c99a18e8dfe269fbbdd69b6b410d91e Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Fri, 3 Sep 2021 19:52:43 +0300 Subject: [PATCH 077/125] Update StorageTableFunction.h --- src/Storages/StorageTableFunction.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/StorageTableFunction.h b/src/Storages/StorageTableFunction.h index 2edc988fca4..1b67f6a2aa9 100644 --- a/src/Storages/StorageTableFunction.h +++ b/src/Storages/StorageTableFunction.h @@ -145,7 +145,7 @@ public: std::lock_guard lock{nested_mutex}; if (nested) assert(nested->getStoragePolicy() == {}); - /// Table functions cannot have storage policy. + /// Table functions cannot have storage policy. return {}; } From 13466a7cc3b2c70f12d3902886ab56ed20a8f943 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Fri, 3 Sep 2021 20:04:30 +0300 Subject: [PATCH 078/125] minor fix --- src/Storages/StorageDistributed.h | 2 +- src/Storages/StorageTableFunction.h | 25 +++++++++++++++---------- 2 files changed, 16 insertions(+), 11 deletions(-) diff --git a/src/Storages/StorageDistributed.h b/src/Storages/StorageDistributed.h index b003f8c6486..80800e50059 100644 --- a/src/Storages/StorageDistributed.h +++ b/src/Storages/StorageDistributed.h @@ -101,7 +101,7 @@ public: void flush() override; void drop() override; - bool storesDataOnDisk() const override { return true; } + bool storesDataOnDisk() const override { return data_volume != nullptr; } Strings getDataPaths() const override; ActionLock getActionLock(StorageActionBlockType type) override; diff --git a/src/Storages/StorageTableFunction.h b/src/Storages/StorageTableFunction.h index 1b67f6a2aa9..557f378ab77 100644 --- a/src/Storages/StorageTableFunction.h +++ b/src/Storages/StorageTableFunction.h @@ -32,7 +32,7 @@ public: setInMemoryMetadata(cached_metadata); } - StoragePtr getNested() const override + StoragePtr getNestedImpl() const { std::lock_guard lock{nested_mutex}; if (nested) @@ -46,6 +46,20 @@ public: return nested; } + StoragePtr getNested() const override + { + StoragePtr nested_storage = getNestedImpl(); + assert(!nested_storage->getStoragePolicy()); + assert(!nested_storage->storesDataOnDisk()); + return nested_storage; + } + + /// Table functions cannot have storage policy and cannot store data on disk. + /// We may check if table is readonly or stores data on disk on DROP TABLE. + /// Avoid loading nested table by returning nullptr/false for all table functions. + StoragePolicyPtr getStoragePolicy() const override { return nullptr; } + bool storesDataOnDisk() const override { return false; } + String getName() const override { std::lock_guard lock{nested_mutex}; @@ -140,15 +154,6 @@ public: bool isView() const override { return false; } void checkTableCanBeDropped() const override {} - StoragePolicyPtr getStoragePolicy() const override - { - std::lock_guard lock{nested_mutex}; - if (nested) - assert(nested->getStoragePolicy() == {}); - /// Table functions cannot have storage policy. - return {}; - } - private: mutable std::mutex nested_mutex; mutable GetNestedStorageFunc get_nested; From c929255ec086c30e83c5ae687369ca1f977b69cc Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Fri, 3 Sep 2021 22:08:56 +0300 Subject: [PATCH 079/125] [docs] incorporate changes requested by Chaim Haas (#28588) --- docs/en/introduction/adopters.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/introduction/adopters.md b/docs/en/introduction/adopters.md index d408a3d6849..83cbfde939d 100644 --- a/docs/en/introduction/adopters.md +++ b/docs/en/introduction/adopters.md @@ -25,7 +25,7 @@ toc_title: Adopters | Badoo | Dating | Timeseries | — | — | [Slides in Russian, December 2019](https://presentations.clickhouse.tech/meetup38/forecast.pdf) | | Benocs | Network Telemetry and Analytics | Main Product | — | — | [Slides in English, October 2017](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup9/lpm.pdf) | | BIGO | Video | Computing Platform | — | — | [Blog Article, August 2020](https://www.programmersought.com/article/44544895251/) | -| Bloomberg | Finance, Media | Monitoring | 102 servers | — | [Slides, May 2018](https://www.slideshare.net/Altinity/http-analytics-for-6m-requests-per-second-using-clickhouse-by-alexander-bocharov) | +| Bloomberg | Finance, Media | Monitoring | — | — | [Slides, May 2018](https://www.slideshare.net/Altinity/http-analytics-for-6m-requests-per-second-using-clickhouse-by-alexander-bocharov) | | Bloxy | Blockchain | Analytics | — | — | [Slides in Russian, August 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup17/4_bloxy.pptx) | | Bytedance | Social platforms | — | — | — | [The ClickHouse Meetup East, October 2020](https://www.youtube.com/watch?v=ckChUkC3Pns) | | CardsMobile | Finance | Analytics | — | — | [VC.ru](https://vc.ru/s/cardsmobile/143449-rukovoditel-gruppy-analiza-dannyh) | From c329f04b2298838e1077c1331109e0922e55a1a7 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Fri, 3 Sep 2021 23:08:55 +0300 Subject: [PATCH 080/125] Fixed tests --- src/Functions/FunctionsExternalDictionaries.h | 66 ++++++++++++++++--- .../2014_dict_get_nullable_key.reference | 13 ++++ .../2014_dict_get_nullable_key.sql | 29 ++++++++ 3 files changed, 99 insertions(+), 9 deletions(-) create mode 100644 tests/queries/0_stateless/2014_dict_get_nullable_key.reference create mode 100644 tests/queries/0_stateless/2014_dict_get_nullable_key.sql diff --git a/src/Functions/FunctionsExternalDictionaries.h b/src/Functions/FunctionsExternalDictionaries.h index 14a6430723c..4f79b06b44a 100644 --- a/src/Functions/FunctionsExternalDictionaries.h +++ b/src/Functions/FunctionsExternalDictionaries.h @@ -293,6 +293,7 @@ public: size_t getNumberOfArguments() const override { return 0; } bool useDefaultImplementationForConstants() const final { return true; } + bool useDefaultImplementationForNulls() const final { return false; } ColumnNumbers getArgumentsThatAreAlwaysConstant() const final { return {0, 1}; } bool isDeterministic() const override { return false; } @@ -320,21 +321,32 @@ public: Strings attribute_names = getAttributeNamesFromColumn(arguments[1].column, arguments[1].type); - DataTypes types; - auto dictionary_structure = helper.getDictionaryStructure(dictionary_name); + DataTypes attribute_types; + attribute_types.reserve(attribute_names.size()); for (auto & attribute_name : attribute_names) { /// We're extracting the return type from the dictionary's config, without loading the dictionary. - auto attribute = dictionary_structure.getAttribute(attribute_name); - types.emplace_back(attribute.type); + const auto & attribute = dictionary_structure.getAttribute(attribute_name); + attribute_types.emplace_back(attribute.type); } - if (types.size() > 1) - return std::make_shared(types, attribute_names); + bool key_is_nullable = arguments[2].type->isNullable(); + if (attribute_types.size() > 1) + { + if (key_is_nullable) + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Function {} support nullable key only for single dictionary attribute", getName()); + + return std::make_shared(attribute_types, attribute_names); + } else - return types.front(); + { + if (key_is_nullable) + return makeNullable(attribute_types.front()); + else + return attribute_types.front(); + } } ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override @@ -417,7 +429,9 @@ public: default_cols = tuple_column->getColumnsCopy(); } else + { default_cols.emplace_back(result); + } } else { @@ -425,7 +439,16 @@ public: default_cols.emplace_back(nullptr); } - const auto & key_col_with_type = arguments[2]; + auto key_col_with_type = arguments[2]; + + bool key_is_only_null = key_col_with_type.type->onlyNull(); + if (key_is_only_null) + return result_type->createColumnConstWithDefaultValue(input_rows_count); + + bool key_is_nullable = key_col_with_type.type->isNullable(); + if (key_is_nullable) + key_col_with_type = columnGetNested(key_col_with_type); + auto key_column = key_col_with_type.column; Columns key_columns; @@ -481,7 +504,26 @@ public: key_types.emplace_back(range_col_type); } - return executeDictionaryRequest(dictionary, attribute_names, key_columns, key_types, result_type, default_cols); + DataTypePtr attribute_type = result_type; + if (key_is_nullable) + { + DataTypes attribute_types; + attribute_types.reserve(attribute_names.size()); + for (auto & attribute_name : attribute_names) + { + const auto & attribute = dictionary->getStructure().getAttribute(attribute_name); + attribute_types.emplace_back(attribute.type); + } + + attribute_type = attribute_types.front(); + } + + auto result_column = executeDictionaryRequest(dictionary, attribute_names, key_columns, key_types, attribute_type, default_cols); + + if (key_is_nullable) + result_column = wrapInNullable(result_column, {arguments[2]}, result_type, input_rows_count); + + return result_column; } private: @@ -510,12 +552,14 @@ private: result = ColumnTuple::create(std::move(result_columns)); } else + { result = dictionary->getColumn( attribute_names[0], result_type, key_columns, key_types, default_cols.front()); + } return result; } @@ -525,7 +569,9 @@ private: Strings attribute_names; if (const auto * name_col = checkAndGetColumnConst(column.get())) + { attribute_names.emplace_back(name_col->getValue()); + } else if (const auto * tuple_col_const = checkAndGetColumnConst(column.get())) { const ColumnTuple & tuple_col = assert_cast(tuple_col_const->getDataColumn()); @@ -550,10 +596,12 @@ private: } } else + { throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of second argument of function {}, expected a const string or const tuple of const strings.", type->getName(), getName()); + } return attribute_names; } diff --git a/tests/queries/0_stateless/2014_dict_get_nullable_key.reference b/tests/queries/0_stateless/2014_dict_get_nullable_key.reference new file mode 100644 index 00000000000..08127d35829 --- /dev/null +++ b/tests/queries/0_stateless/2014_dict_get_nullable_key.reference @@ -0,0 +1,13 @@ +Non nullable value only null key +\N +Non nullable value nullable key +Test +\N + +Nullable value only null key +\N +Nullable value nullable key +Test +\N +\N +\N diff --git a/tests/queries/0_stateless/2014_dict_get_nullable_key.sql b/tests/queries/0_stateless/2014_dict_get_nullable_key.sql new file mode 100644 index 00000000000..d6c058b285f --- /dev/null +++ b/tests/queries/0_stateless/2014_dict_get_nullable_key.sql @@ -0,0 +1,29 @@ +DROP TABLE IF EXISTS dictionary_non_nullable_source_table; +CREATE TABLE dictionary_non_nullable_source_table (id UInt64, value String) ENGINE=TinyLog; +INSERT INTO dictionary_non_nullable_source_table VALUES (0, 'Test'); + +DROP DICTIONARY IF EXISTS test_dictionary_non_nullable; +CREATE DICTIONARY test_dictionary_non_nullable (id UInt64, value String) PRIMARY KEY id LAYOUT(DIRECT()) SOURCE(CLICKHOUSE(TABLE 'dictionary_non_nullable_source_table')); + +SELECT 'Non nullable value only null key '; +SELECT dictGet('test_dictionary_non_nullable', 'value', NULL); +SELECT 'Non nullable value nullable key'; +SELECT dictGet('test_dictionary_non_nullable', 'value', arrayJoin([toUInt64(0), NULL, 1])); + +DROP DICTIONARY test_dictionary_non_nullable; +DROP TABLE dictionary_non_nullable_source_table; + +DROP TABLE IF EXISTS dictionary_nullable_source_table; +CREATE TABLE dictionary_nullable_source_table (id UInt64, value Nullable(String)) ENGINE=TinyLog; +INSERT INTO dictionary_nullable_source_table VALUES (0, 'Test'), (1, NULL); + +DROP DICTIONARY IF EXISTS test_dictionary_nullable; +CREATE DICTIONARY test_dictionary_nullable (id UInt64, value Nullable(String)) PRIMARY KEY id LAYOUT(DIRECT()) SOURCE(CLICKHOUSE(TABLE 'dictionary_nullable_source_table')); + +SELECT 'Nullable value only null key '; +SELECT dictGet('test_dictionary_nullable', 'value', NULL); +SELECT 'Nullable value nullable key'; +SELECT dictGet('test_dictionary_nullable', 'value', arrayJoin([toUInt64(0), NULL, 1, 2])); + +DROP DICTIONARY test_dictionary_nullable; +DROP TABLE dictionary_nullable_source_table; From bf6ba796f8b5df9e4a384562bb0370118c2222cc Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 2 Sep 2021 21:05:13 +0300 Subject: [PATCH 081/125] Fix UUID overlap in DROP TABLE for internal DDL from MaterializeMySQL This will fix race with DatabaseCatalog::loadMarkedAsDroppedTables(), since MaterializeMySQL, and MaterializedMySQLSyncThread in background, will be started earlier then DatabaseCatalog::loadMarkedAsDroppedTables() and will move those tables to metadata_dropped, and after loadMarkedAsDroppedTables() will start and try to load partially dropped tables and will hit UUID overlap: 12:02:51.536783 [ 3026034 ] {} Application: starting up 12:02:53.019282 [ 3026034 ] {} DatabaseMaterializeMySQL (mysql): Total 9 tables and 0 dictionaries. 12:02:53.041699 [ 3026200 ] {} mysql.data (7143b65f-6982-4600-b143-b65f6982e600): Loading data parts 12:02:53.041740 [ 3026200 ] {} mysql.data (7143b65f-6982-4600-b143-b65f6982e600): There are no data parts 12:02:53.620382 [ 3026034 ] {} DatabaseMaterializeMySQL (mysql): Starting up tables. 12:03:00.669730 [ 3026183 ] {} executeQuery: (internal) /*Materialize MySQL step 1: execute MySQL DDL for dump data*/ DROP TABLE mysql.data 12:03:00.741894 [ 3026269 ] {} DatabaseCatalog: Trying load partially dropped table mysql.data (7143b65f-6982-4600-b143-b65f6982e600) from /var/lib/clickhouse/metadata_dropped/mysql.data.7143b65f-6982-4600-b143-b65f6982e600.sql 12:03:00.742582 [ 3026269 ] {} mysql.data (7143b65f-6982-4600-b143-b65f6982e600): Loading data parts 12:03:00.742650 [ 3026269 ] {} mysql.data (7143b65f-6982-4600-b143-b65f6982e600): There are no data parts 12:03:00.773137 [ 3026034 ] {} Application: Caught exception while loading metadata: Code: 57, e.displayText() = DB::Exception: Mapping for table with UUID=7143b65f-6982-4600-b143-b65f6982e600 already exists. It happened due to UUID collision, most likely because some not random UUIDs were manually specified in CREATE queries., Stack trace (when copying this message, always include the lines below): 12:03:01.224557 [ 3026034 ] {} Application: DB::Exception: Mapping for table with UUID=7143b65f-6982-4600-b143-b65f6982e600 already exists. It happened due to UUID collision, most likely because some not random UUIDs were manually specified in CREATE queries. Cc: @zhang2014 --- programs/server/Server.cpp | 4 ++++ src/Interpreters/DatabaseCatalog.cpp | 1 - src/Interpreters/DatabaseCatalog.h | 2 +- 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index bf0d33d9c5c..c09b4f774fa 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -1131,6 +1131,10 @@ if (ThreadFuzzer::instance().isEffective()) global_context->setSystemZooKeeperLogAfterInitializationIfNeeded(); /// After the system database is created, attach virtual system tables (in addition to query_log and part_log) attachSystemTablesServer(*database_catalog.getSystemDatabase(), has_zookeeper); + /// Firstly remove partially dropped databases, to avoid race with MaterializedMySQLSyncThread, + /// that may execute DROP before loadMarkedAsDroppedTables() in background, + /// and so loadMarkedAsDroppedTables() will find it and try to add, and UUID will overlap. + database_catalog.loadMarkedAsDroppedTables(); /// Then, load remaining databases loadMetadata(global_context, default_database); database_catalog.loadDatabases(); diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp index 6e0ca97df1d..af60eeeaba3 100644 --- a/src/Interpreters/DatabaseCatalog.cpp +++ b/src/Interpreters/DatabaseCatalog.cpp @@ -146,7 +146,6 @@ void DatabaseCatalog::initializeAndLoadTemporaryDatabase() void DatabaseCatalog::loadDatabases() { - loadMarkedAsDroppedTables(); auto task_holder = getContext()->getSchedulePool().createTask("DatabaseCatalog", [this](){ this->dropTableDataTask(); }); drop_task = std::make_unique(std::move(task_holder)); (*drop_task)->activate(); diff --git a/src/Interpreters/DatabaseCatalog.h b/src/Interpreters/DatabaseCatalog.h index 787fd9dc512..72dd28d335b 100644 --- a/src/Interpreters/DatabaseCatalog.h +++ b/src/Interpreters/DatabaseCatalog.h @@ -130,6 +130,7 @@ public: void initializeAndLoadTemporaryDatabase(); void loadDatabases(); + void loadMarkedAsDroppedTables(); /// Get an object that protects the table from concurrently executing multiple DDL operations. DDLGuardPtr getDDLGuard(const String & database, const String & table); @@ -241,7 +242,6 @@ private: }; using TablesMarkedAsDropped = std::list; - void loadMarkedAsDroppedTables(); void dropTableDataTask(); void dropTableFinally(const TableMarkedAsDropped & table); From 69fa28f332049294f37f5b833333b4d91a3989c4 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Sat, 4 Sep 2021 14:29:40 +0300 Subject: [PATCH 082/125] Fixed tests --- .../ExecutablePoolDictionarySource.cpp | 3 --- tests/config/executable_dictionary.xml | 16 ++++++++-------- 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/src/Dictionaries/ExecutablePoolDictionarySource.cpp b/src/Dictionaries/ExecutablePoolDictionarySource.cpp index 10164fe7310..c5d081bb3e6 100644 --- a/src/Dictionaries/ExecutablePoolDictionarySource.cpp +++ b/src/Dictionaries/ExecutablePoolDictionarySource.cpp @@ -4,9 +4,6 @@ #include #include -#include -#include - #include #include diff --git a/tests/config/executable_dictionary.xml b/tests/config/executable_dictionary.xml index 6089f57a3d7..e2b0bd0663e 100644 --- a/tests/config/executable_dictionary.xml +++ b/tests/config/executable_dictionary.xml @@ -123,8 +123,8 @@ - printf "1\tValue\n" - TabSeparated + JSONEachRow + cd /; clickhouse-local --input-format JSONEachRow --output-format JSONEachRow --structure 'id UInt64' --query "SELECT id, 'Value' AS value FROM table" false @@ -156,8 +156,8 @@ - echo "Value" - TabSeparated + JSONEachRow + cd /; clickhouse-local --input-format JSONEachRow --output-format JSONEachRow --structure 'id UInt64' --query "SELECT 'Value' AS value FROM table" true @@ -197,8 +197,8 @@ - printf "1\tFirstKey\tValue\n" - TabSeparated + JSONEachRow + cd /; clickhouse-local --input-format JSONEachRow --output-format JSONEachRow --structure 'id UInt64, id_key String' --query "SELECT id, id_key, 'Value' AS value FROM table" false @@ -238,8 +238,8 @@ - echo "Value" - TabSeparated + JSONEachRow + cd /; clickhouse-local --input-format JSONEachRow --output-format JSONEachRow --structure 'id UInt64, id_key String' --query "SELECT 'Value' AS value FROM table" true From ac2d9a73a8d62d72114b1ab27a656c922b7f05be Mon Sep 17 00:00:00 2001 From: kssenii Date: Sat, 4 Sep 2021 13:07:59 +0300 Subject: [PATCH 083/125] User managed slots --- .../materialized-postgresql.md | 14 ++++- docs/en/operations/settings/settings.md | 10 +++- .../DatabaseMaterializedPostgreSQL.cpp | 6 +- .../MaterializedPostgreSQLSettings.h | 2 + .../PostgreSQLReplicationHandler.cpp | 57 ++++++++++++++----- .../PostgreSQL/PostgreSQLReplicationHandler.h | 10 ++-- .../StorageMaterializedPostgreSQL.cpp | 6 +- .../test.py | 41 ++++++++++++- 8 files changed, 119 insertions(+), 27 deletions(-) diff --git a/docs/en/engines/database-engines/materialized-postgresql.md b/docs/en/engines/database-engines/materialized-postgresql.md index 89c7c803bb3..77a5f2af0e0 100644 --- a/docs/en/engines/database-engines/materialized-postgresql.md +++ b/docs/en/engines/database-engines/materialized-postgresql.md @@ -31,6 +31,10 @@ ENGINE = MaterializedPostgreSQL('host:port', ['database' | database], 'user', 'p - [materialized_postgresql_allow_automatic_update](../../operations/settings/settings.md#materialized-postgresql-allow-automatic-update) +- [materialized_postgresql_replication_slot](../../operations/settings/settings.md#materialized-postgresql-replication-slot) + +- [materialized_postgresql_snapshot](../../operations/settings/settings.md#materialized-postgresql-snapshot) + ``` sql CREATE DATABASE database1 ENGINE = MaterializedPostgreSQL('postgres1:5432', 'postgres_database', 'postgres_user', 'postgres_password') @@ -73,7 +77,7 @@ WHERE oid = 'postgres_table'::regclass; !!! warning "Warning" Replication of [**TOAST**](https://www.postgresql.org/docs/9.5/storage-toast.html) values is not supported. The default value for the data type will be used. - + ## Example of Use {#example-of-use} ``` sql @@ -82,3 +86,11 @@ ENGINE = MaterializedPostgreSQL('postgres1:5432', 'postgres_database', 'postgres SELECT * FROM postgresql_db.postgres_table; ``` + +## Notes {#notes} + +- Failover of the logical replication slot. + +Logical Replication Slots which exist on the primary are not available on standby replicas. +So if there is a failover, new primary (the old physical standby) won’t be aware of any slots which were existing with old primary. This will lead to a broken replication from PostgreSQL. +A solution to this is to manage replication slots yourself and define a permanent replication slot (some information can be found [here](https://patroni.readthedocs.io/en/latest/SETTINGS.html)). You'll need to pass slot name via `materialized_postgresql_replication_slot` setting, and it has to be exported with `EXPORT SNAPSHOT` option. The snapshot identifier needs to be passed via `materialized_postgresql_snapshot` setting. diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index a1c7d1aab32..5635321b598 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -3436,6 +3436,14 @@ Possible values: Default value: `0`. +## materialized_postgresql_replication_slot {#materialized-postgresql-replication-slot} + +Allows to have user-managed replication slots. Must be used together with `materialized_postgresql_snapshot`. + +## materialized_postgresql_replication_slot {#materialized-postgresql-replication-slot} + +A text string identifying a snapshot, from which initial dump of tables will be performed. Must be used together with `materialized_postgresql_replication_slot`. + ## allow_experimental_projection_optimization {#allow-experimental-projection-optimization} Enables or disables [projection](../../engines/table-engines/mergetree-family/mergetree.md#projections) optimization when processing `SELECT` queries. @@ -3449,7 +3457,7 @@ Default value: `0`. ## force_optimize_projection {#force-optimize-projection} -Enables or disables the obligatory use of [projections](../../engines/table-engines/mergetree-family/mergetree.md#projections) in `SELECT` queries, when projection optimization is enabled (see [allow_experimental_projection_optimization](#allow-experimental-projection-optimization) setting). +Enables or disables the obligatory use of [projections](../../engines/table-engines/mergetree-family/mergetree.md#projections) in `SELECT` queries, when projection optimization is enabled (see [allow_experimental_projection_optimization](#allow-experimental-projection-optimization) setting). Possible values: diff --git a/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.cpp b/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.cpp index c9ea8d12ef2..218dda94d31 100644 --- a/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.cpp +++ b/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.cpp @@ -61,10 +61,8 @@ void DatabaseMaterializedPostgreSQL::startSynchronization() connection_info, getContext(), is_attach, - settings->materialized_postgresql_max_block_size.value, - settings->materialized_postgresql_allow_automatic_update, - /* is_materialized_postgresql_database = */ true, - settings->materialized_postgresql_tables_list.value); + *settings, + /* is_materialized_postgresql_database = */ true); postgres::Connection connection(connection_info); NameSet tables_to_replicate; diff --git a/src/Storages/PostgreSQL/MaterializedPostgreSQLSettings.h b/src/Storages/PostgreSQL/MaterializedPostgreSQLSettings.h index 1d986b223e9..cc147a01d32 100644 --- a/src/Storages/PostgreSQL/MaterializedPostgreSQLSettings.h +++ b/src/Storages/PostgreSQL/MaterializedPostgreSQLSettings.h @@ -17,6 +17,8 @@ namespace DB M(UInt64, materialized_postgresql_max_block_size, 65536, "Number of row collected before flushing data into table.", 0) \ M(String, materialized_postgresql_tables_list, "", "List of tables for MaterializedPostgreSQL database engine", 0) \ M(Bool, materialized_postgresql_allow_automatic_update, false, "Allow to reload table in the background, when schema changes are detected", 0) \ + M(String, materialized_postgresql_replication_slot, "", "A user-created replication slot", 0) \ + M(String, materialized_postgresql_snapshot, "", "User provided snapshot in case he manages replication slots himself", 0) \ DECLARE_SETTINGS_TRAITS(MaterializedPostgreSQLSettingsTraits, LIST_OF_MATERIALIZED_POSTGRESQL_SETTINGS) diff --git a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp index c8c74d2ddaa..1bda6d13e11 100644 --- a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp +++ b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp @@ -32,24 +32,28 @@ PostgreSQLReplicationHandler::PostgreSQLReplicationHandler( const postgres::ConnectionInfo & connection_info_, ContextPtr context_, bool is_attach_, - const size_t max_block_size_, - bool allow_automatic_update_, - bool is_materialized_postgresql_database_, - const String tables_list_) + const MaterializedPostgreSQLSettings & replication_settings, + bool is_materialized_postgresql_database_) : log(&Poco::Logger::get("PostgreSQLReplicationHandler")) , context(context_) , is_attach(is_attach_) , remote_database_name(remote_database_name_) , current_database_name(current_database_name_) , connection_info(connection_info_) - , max_block_size(max_block_size_) - , allow_automatic_update(allow_automatic_update_) + , max_block_size(replication_settings.materialized_postgresql_max_block_size) + , allow_automatic_update(replication_settings.materialized_postgresql_allow_automatic_update) , is_materialized_postgresql_database(is_materialized_postgresql_database_) - , tables_list(tables_list_) + , tables_list(replication_settings.materialized_postgresql_tables_list) + , user_provided_snapshot(replication_settings.materialized_postgresql_snapshot) , connection(std::make_shared(connection_info_)) , milliseconds_to_wait(RESCHEDULE_MS) { - replication_slot = fmt::format("{}_ch_replication_slot", replication_identifier); + replication_slot = replication_settings.materialized_postgresql_replication_slot; + if (replication_slot.empty()) + { + user_managed_slot = false; + replication_slot = fmt::format("{}_ch_replication_slot", replication_identifier); + } publication_name = fmt::format("{}_ch_publication", replication_identifier); startup_task = context->getSchedulePool().createTask("PostgreSQLReplicaStartup", [this]{ waitConnectionAndStart(); }); @@ -121,7 +125,20 @@ void PostgreSQLReplicationHandler::startSynchronization(bool throw_on_error) auto initial_sync = [&]() { - createReplicationSlot(tx, start_lsn, snapshot_name); + LOG_TRACE(log, "Starting tables sync load"); + + if (user_managed_slot) + { + if (user_provided_snapshot.empty()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Using a user-defined replication slot must be provided with a snapshot from EXPORT SNAPSHOT when the slot is created." + "Pass it to `materialized_postgresql_snapshot` setting"); + snapshot_name = user_provided_snapshot; + } + else + { + createReplicationSlot(tx, start_lsn, snapshot_name); + } for (const auto & [table_name, storage] : materialized_storages) { @@ -147,12 +164,17 @@ void PostgreSQLReplicationHandler::startSynchronization(bool throw_on_error) /// Recreation of a replication slot imposes reloading of all tables. if (!isReplicationSlotExist(tx, start_lsn, /* temporary */false)) { + if (user_managed_slot) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Having replication slot `{}` from settings, but it does not exist", replication_slot); + initial_sync(); } /// Always drop replication slot if it is CREATE query and not ATTACH. else if (!is_attach || new_publication) { - dropReplicationSlot(tx); + if (!user_managed_slot) + dropReplicationSlot(tx); + initial_sync(); } /// Synchronization and initial load already took place - do not create any new tables, just fetch StoragePtr's @@ -376,6 +398,8 @@ bool PostgreSQLReplicationHandler::isReplicationSlotExist(pqxx::nontransaction & void PostgreSQLReplicationHandler::createReplicationSlot( pqxx::nontransaction & tx, String & start_lsn, String & snapshot_name, bool temporary) { + assert(temporary || !user_managed_slot); + String query_str, slot_name; if (temporary) slot_name = replication_slot + "_tmp"; @@ -401,6 +425,8 @@ void PostgreSQLReplicationHandler::createReplicationSlot( void PostgreSQLReplicationHandler::dropReplicationSlot(pqxx::nontransaction & tx, bool temporary) { + assert(temporary || !user_managed_slot); + std::string slot_name; if (temporary) slot_name = replication_slot + "_tmp"; @@ -433,14 +459,17 @@ void PostgreSQLReplicationHandler::shutdownFinal() connection->execWithRetry([&](pqxx::nontransaction & tx) { - if (isReplicationSlotExist(tx, last_committed_lsn, /* temporary */false)) - dropReplicationSlot(tx, /* temporary */false); + if (isReplicationSlotExist(tx, last_committed_lsn, /* temporary */true)) + dropReplicationSlot(tx, /* temporary */true); }); + if (user_managed_slot) + return; + connection->execWithRetry([&](pqxx::nontransaction & tx) { - if (isReplicationSlotExist(tx, last_committed_lsn, /* temporary */true)) - dropReplicationSlot(tx, /* temporary */true); + if (isReplicationSlotExist(tx, last_committed_lsn, /* temporary */false)) + dropReplicationSlot(tx, /* temporary */false); }); } catch (Exception & e) diff --git a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.h b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.h index 3a0bedc0852..eacf6b69b3b 100644 --- a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.h +++ b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.h @@ -1,6 +1,7 @@ #pragma once #include "MaterializedPostgreSQLConsumer.h" +#include "MaterializedPostgreSQLSettings.h" #include #include @@ -25,10 +26,8 @@ public: const postgres::ConnectionInfo & connection_info_, ContextPtr context_, bool is_attach_, - const size_t max_block_size_, - bool allow_automatic_update_, - bool is_materialized_postgresql_database_, - const String tables_list = ""); + const MaterializedPostgreSQLSettings & replication_settings, + bool is_materialized_postgresql_database_); /// Activate task to be run from a separate thread: wait until connection is available and call startReplication(). void startup(); @@ -108,6 +107,9 @@ private: /// A coma-separated list of tables, which are going to be replicated for database engine. By default, a whole database is replicated. String tables_list; + bool user_managed_slot = true; + String user_provided_snapshot; + String replication_slot, publication_name; /// Shared between replication_consumer and replication_handler, but never accessed concurrently. diff --git a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp index aa27a54cdac..73a685af9b4 100644 --- a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp +++ b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp @@ -64,6 +64,8 @@ StorageMaterializedPostgreSQL::StorageMaterializedPostgreSQL( setInMemoryMetadata(storage_metadata); String replication_identifier = remote_database_name + "_" + remote_table_name_; + replication_settings->materialized_postgresql_tables_list = remote_table_name_; + replication_handler = std::make_unique( replication_identifier, remote_database_name, @@ -71,8 +73,8 @@ StorageMaterializedPostgreSQL::StorageMaterializedPostgreSQL( connection_info, getContext(), is_attach, - replication_settings->materialized_postgresql_max_block_size.value, - /* allow_automatic_update */ false, /* is_materialized_postgresql_database */false); + *replication_settings, + /* is_materialized_postgresql_database */false); if (!is_attach) { diff --git a/tests/integration/test_postgresql_replica_database_engine/test.py b/tests/integration/test_postgresql_replica_database_engine/test.py index 68b42d91fb6..1e7188458a9 100644 --- a/tests/integration/test_postgresql_replica_database_engine/test.py +++ b/tests/integration/test_postgresql_replica_database_engine/test.py @@ -31,18 +31,30 @@ postgres_table_template_3 = """ key1 Integer NOT NULL, value1 Integer, key2 Integer NOT NULL, value2 Integer NOT NULL) """ -def get_postgres_conn(ip, port, database=False, auto_commit=True, database_name='postgres_database'): +def get_postgres_conn(ip, port, database=False, auto_commit=True, database_name='postgres_database', replication=False): if database == True: conn_string = "host={} port={} dbname='{}' user='postgres' password='mysecretpassword'".format(ip, port, database_name) else: conn_string = "host={} port={} user='postgres' password='mysecretpassword'".format(ip, port) + if replication: + conn_string += " replication='database'" + conn = psycopg2.connect(conn_string) if auto_commit: conn.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT) conn.autocommit = True return conn +def create_replication_slot(conn, slot_name='user_slot'): + cursor = conn.cursor() + cursor.execute('CREATE_REPLICATION_SLOT {} LOGICAL pgoutput EXPORT_SNAPSHOT'.format(slot_name)) + result = cursor.fetchall() + print(result[0][0]) # slot name + print(result[0][1]) # start lsn + print(result[0][2]) # snapshot + return result[0][2] + def create_postgres_db(cursor, name='postgres_database'): cursor.execute("CREATE DATABASE {}".format(name)) @@ -941,6 +953,33 @@ def test_quoting(started_cluster): drop_materialized_db() +def test_user_managed_slots(started_cluster): + conn = get_postgres_conn(ip=started_cluster.postgres_ip, + port=started_cluster.postgres_port, + database=True) + cursor = conn.cursor() + table_name = 'test_table' + create_postgres_table(cursor, table_name); + instance.query("INSERT INTO postgres_database.{} SELECT number, number from numbers(10000)".format(table_name)) + + slot_name = 'user_slot' + replication_connection = get_postgres_conn(ip=started_cluster.postgres_ip, port=started_cluster.postgres_port, + database=True, replication=True, auto_commit=True) + snapshot = create_replication_slot(replication_connection, slot_name=slot_name) + create_materialized_db(ip=started_cluster.postgres_ip, + port=started_cluster.postgres_port, + settings=["materialized_postgresql_replication_slot = '{}'".format(slot_name), + "materialized_postgresql_snapshot = '{}'".format(snapshot)]) + check_tables_are_synchronized(table_name); + instance.query("INSERT INTO postgres_database.{} SELECT number, number from numbers(10000, 10000)".format(table_name)) + check_tables_are_synchronized(table_name); + instance.restart_clickhouse() + instance.query("INSERT INTO postgres_database.{} SELECT number, number from numbers(20000, 10000)".format(table_name)) + check_tables_are_synchronized(table_name); + drop_postgres_table(cursor, table_name) + drop_materialized_db() + + if __name__ == '__main__': cluster.start() input("Cluster created, press any key to destroy...") From 8e9089f15aac34faa7c667f737e3a6455e76492e Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Sat, 4 Sep 2021 15:14:15 +0300 Subject: [PATCH 084/125] Update version_date.tsv after release 21.3.16.5 --- utils/list-versions/version_date.tsv | 1 + 1 file changed, 1 insertion(+) diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv index bce6aee6d90..c8885521437 100644 --- a/utils/list-versions/version_date.tsv +++ b/utils/list-versions/version_date.tsv @@ -26,6 +26,7 @@ v21.4.6.55-stable 2021-04-30 v21.4.5.46-stable 2021-04-24 v21.4.4.30-stable 2021-04-16 v21.4.3.21-stable 2021-04-12 +v21.3.16.5-lts 2021-09-03 v21.3.15.4-stable 2021-07-10 v21.3.14.1-lts 2021-07-01 v21.3.13.9-lts 2021-06-22 From bcbc830b0512970de78e86e2037ec0d510e636be Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Sat, 4 Sep 2021 15:28:11 +0300 Subject: [PATCH 085/125] Function dictGet small fix --- src/Functions/FunctionsExternalDictionaries.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/Functions/FunctionsExternalDictionaries.h b/src/Functions/FunctionsExternalDictionaries.h index 4f79b06b44a..73810a306d6 100644 --- a/src/Functions/FunctionsExternalDictionaries.h +++ b/src/Functions/FunctionsExternalDictionaries.h @@ -104,9 +104,11 @@ public: const auto * attr_name_col = checkAndGetColumnConst(sample_columns.getByPosition(1).column.get()); if (!attr_name_col) - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Second argument of function dictGet... must be a constant string"); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Second argument of function dictGet must be a constant string"); - return getDictionary(dict_name_col->getValue())->isInjective(attr_name_col->getValue()); + const auto dictionary_name = dict_name_col->getValue(); + const auto attribute_name = attr_name_col->getValue(); + return getDictionary(dictionary_name)->isInjective(attribute_name); } DictionaryStructure getDictionaryStructure(const String & dictionary_name) const @@ -585,14 +587,14 @@ private: { const auto * tuple_column = tuple_col.getColumnPtr(i).get(); - const auto * attribute_name_column = checkAndGetColumn(tuple_column); + const auto * attribute_name_column = checkAndGetColumnConst(tuple_column); if (!attribute_name_column) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Tuple second argument of function {} must contain multiple constant string columns", getName()); - attribute_names.emplace_back(attribute_name_column->getDataAt(0)); + attribute_names.emplace_back(attribute_name_column->getValue()); } } else From 327bd44f23502226b3657ed415afe51b26b3354c Mon Sep 17 00:00:00 2001 From: alesapin Date: Sat, 4 Sep 2021 15:46:41 +0300 Subject: [PATCH 086/125] Fix gtest --- src/Common/tests/gtest_log.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Common/tests/gtest_log.cpp b/src/Common/tests/gtest_log.cpp index e60d87a30c0..4769b8c0261 100644 --- a/src/Common/tests/gtest_log.cpp +++ b/src/Common/tests/gtest_log.cpp @@ -26,7 +26,7 @@ TEST(Logger, TestLog) std::ostringstream oss; // STYLE_CHECK_ALLOW_STD_STRING_STREAM auto my_channel = Poco::AutoPtr(new Poco::StreamChannel(oss)); - auto log = &Poco::Logger::create("TestLogger", my_channel.get()); + auto * log = &Poco::Logger::create("TestLogger", my_channel.get()); log->setLevel("test"); LOG_TEST(log, "Hello World"); @@ -39,7 +39,7 @@ TEST(Logger, TestLog) { std::ostringstream oss; // STYLE_CHECK_ALLOW_STD_STRING_STREAM auto my_channel = Poco::AutoPtr(new Poco::StreamChannel(oss)); - auto log = &Poco::Logger::create(std::string{level} + "_Logger", my_channel.get()); + auto * log = &Poco::Logger::create(std::string{level} + "_Logger", my_channel.get()); log->setLevel(level); LOG_TEST(log, "Hello World"); From 5f9952d7420d9f173c210abb27d829d0f9708c1e Mon Sep 17 00:00:00 2001 From: alesapin Date: Sat, 4 Sep 2021 15:56:19 +0300 Subject: [PATCH 087/125] Add clang-tidy-12 --- cmake/analysis.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/analysis.cmake b/cmake/analysis.cmake index 267bb34248b..24d8168e2c0 100644 --- a/cmake/analysis.cmake +++ b/cmake/analysis.cmake @@ -6,7 +6,7 @@ if (ENABLE_CLANG_TIDY) message(FATAL_ERROR "clang-tidy requires CMake version at least 3.6.") endif() - find_program (CLANG_TIDY_PATH NAMES "clang-tidy" "clang-tidy-11" "clang-tidy-10" "clang-tidy-9" "clang-tidy-8") + find_program (CLANG_TIDY_PATH NAMES "clang-tidy" "clang-tidy-12" "clang-tidy-11" "clang-tidy-10" "clang-tidy-9" "clang-tidy-8") if (CLANG_TIDY_PATH) message(STATUS From a879c907a228041a94e9dee387a56e5083c273fe Mon Sep 17 00:00:00 2001 From: alesapin Date: Sat, 4 Sep 2021 16:00:42 +0300 Subject: [PATCH 088/125] Update PVS hashsum --- docker/test/pvs/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/test/pvs/Dockerfile b/docker/test/pvs/Dockerfile index 7bd45ba4018..35e07748845 100644 --- a/docker/test/pvs/Dockerfile +++ b/docker/test/pvs/Dockerfile @@ -28,7 +28,7 @@ RUN apt-get update --yes \ ENV PKG_VERSION="pvs-studio-latest" RUN set -x \ - && export PUBKEY_HASHSUM="686e5eb8b3c543a5c54442c39ec876b6c2d912fe8a729099e600017ae53c877dda3368fe38ed7a66024fe26df6b5892a" \ + && export PUBKEY_HASHSUM="ad369a2e9d8b8c30f5a9f2eb131121739b79c78e03fef0f016ea51871a5f78cd4e6257b270dca0ac3be3d1f19d885516" \ && wget -nv https://files.viva64.com/etc/pubkey.txt -O /tmp/pubkey.txt \ && echo "${PUBKEY_HASHSUM} /tmp/pubkey.txt" | sha384sum -c \ && apt-key add /tmp/pubkey.txt \ From 4df6fa1ae7739f9229a149a239c1a3c681d78b69 Mon Sep 17 00:00:00 2001 From: alesapin Date: Sat, 4 Sep 2021 16:09:12 +0300 Subject: [PATCH 089/125] Remove strange changes --- docker/packager/binary/build.sh | 23 +++++++---------------- docker/packager/packager | 3 --- 2 files changed, 7 insertions(+), 19 deletions(-) diff --git a/docker/packager/binary/build.sh b/docker/packager/binary/build.sh index 26f83649762..71402a2fd66 100755 --- a/docker/packager/binary/build.sh +++ b/docker/packager/binary/build.sh @@ -2,24 +2,15 @@ set -x -e -if [ "1" == "${IS_CROSS_DARWIN:0}" ] -then - mkdir -p build/cmake/toolchain/darwin-x86_64 - tar xJf MacOSX11.0.sdk.tar.xz -C build/cmake/toolchain/darwin-x86_64 --strip-components=1 - ln -sf darwin-x86_64 build/cmake/toolchain/darwin-aarch64 -fi +mkdir -p build/cmake/toolchain/darwin-x86_64 +tar xJf MacOSX11.0.sdk.tar.xz -C build/cmake/toolchain/darwin-x86_64 --strip-components=1 +ln -sf darwin-x86_64 build/cmake/toolchain/darwin-aarch64 -if [ "1" == "${IS_CROSS_ARM:0}" ] -then - mkdir -p build/cmake/toolchain/linux-aarch64 - tar xJf gcc-arm-8.3-2019.03-x86_64-aarch64-linux-gnu.tar.xz -C build/cmake/toolchain/linux-aarch64 --strip-components=1 -fi +mkdir -p build/cmake/toolchain/linux-aarch64 +tar xJf gcc-arm-8.3-2019.03-x86_64-aarch64-linux-gnu.tar.xz -C build/cmake/toolchain/linux-aarch64 --strip-components=1 -if [ "1" == "${IS_CROSS_ARM:0}" ] -then - mkdir -p build/cmake/toolchain/freebsd-x86_64 - tar xJf freebsd-11.3-toolchain.tar.xz -C build/cmake/toolchain/freebsd-x86_64 --strip-components=1 -fi +mkdir -p build/cmake/toolchain/freebsd-x86_64 +tar xJf freebsd-11.3-toolchain.tar.xz -C build/cmake/toolchain/freebsd-x86_64 --strip-components=1 # Uncomment to debug ccache. Don't put ccache log in /output right away, or it # will be confusingly packed into the "performance" package. diff --git a/docker/packager/packager b/docker/packager/packager index 1f472ed98e1..f37d64e9949 100755 --- a/docker/packager/packager +++ b/docker/packager/packager @@ -78,7 +78,6 @@ def parse_env_variables(build_type, compiler, sanitizer, package_type, image_typ cmake_flags.append("-DLINKER_NAME=ld.lld") if is_cross_darwin: - result.append("IS_CROSS_DARWIN=1") cc = compiler[:-len(DARWIN_SUFFIX)] cmake_flags.append("-DCMAKE_AR:FILEPATH=/cctools/bin/x86_64-apple-darwin-ar") cmake_flags.append("-DCMAKE_INSTALL_NAME_TOOL=/cctools/bin/x86_64-apple-darwin-install_name_tool") @@ -93,11 +92,9 @@ def parse_env_variables(build_type, compiler, sanitizer, package_type, image_typ cmake_flags.append("-DLINKER_NAME=/cctools/bin/aarch64-apple-darwin-ld") cmake_flags.append("-DCMAKE_TOOLCHAIN_FILE=/build/cmake/darwin/toolchain-aarch64.cmake") elif is_cross_arm: - result.append("IS_CROSS_ARM=1") cc = compiler[:-len(ARM_SUFFIX)] cmake_flags.append("-DCMAKE_TOOLCHAIN_FILE=/build/cmake/linux/toolchain-aarch64.cmake") elif is_cross_freebsd: - result.append("IS_CROSS_FREEBSD=1") cc = compiler[:-len(FREEBSD_SUFFIX)] cmake_flags.append("-DCMAKE_TOOLCHAIN_FILE=/build/cmake/freebsd/toolchain-x86_64.cmake") else: From 685d061c4ed91332f9b0f8f398b302d823e2add6 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Sat, 4 Sep 2021 22:13:20 +0300 Subject: [PATCH 090/125] Fixes tests --- src/Functions/FunctionsExternalDictionaries.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Functions/FunctionsExternalDictionaries.h b/src/Functions/FunctionsExternalDictionaries.h index 73810a306d6..cccf42e470c 100644 --- a/src/Functions/FunctionsExternalDictionaries.h +++ b/src/Functions/FunctionsExternalDictionaries.h @@ -587,14 +587,14 @@ private: { const auto * tuple_column = tuple_col.getColumnPtr(i).get(); - const auto * attribute_name_column = checkAndGetColumnConst(tuple_column); + const auto * attribute_name_column = checkAndGetColumn(tuple_column); if (!attribute_name_column) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Tuple second argument of function {} must contain multiple constant string columns", getName()); - attribute_names.emplace_back(attribute_name_column->getValue()); + attribute_names.emplace_back(attribute_name_column->getDataAt(0)); } } else From 28517e57fc04427b007bbeddc17741afd39ea90a Mon Sep 17 00:00:00 2001 From: kssenii Date: Sat, 4 Sep 2021 23:55:59 +0300 Subject: [PATCH 091/125] Fix test --- .../test_postgresql_replica_database_engine/test.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/integration/test_postgresql_replica_database_engine/test.py b/tests/integration/test_postgresql_replica_database_engine/test.py index 1e7188458a9..1dd096087ff 100644 --- a/tests/integration/test_postgresql_replica_database_engine/test.py +++ b/tests/integration/test_postgresql_replica_database_engine/test.py @@ -55,6 +55,9 @@ def create_replication_slot(conn, slot_name='user_slot'): print(result[0][2]) # snapshot return result[0][2] +def drop_replication_slot(conn, slot_name='user_slot'): + cursor = conn.cursor() + cursor.execute("select pg_drop_replication_slot('{}')".format(slot_name)) def create_postgres_db(cursor, name='postgres_database'): cursor.execute("CREATE DATABASE {}".format(name)) @@ -978,6 +981,7 @@ def test_user_managed_slots(started_cluster): check_tables_are_synchronized(table_name); drop_postgres_table(cursor, table_name) drop_materialized_db() + drop_replication_slot(replication_connection, slot_name) if __name__ == '__main__': From 3c166df8ea1e0c44483262d588eaec2be81735d4 Mon Sep 17 00:00:00 2001 From: feng lv Date: Sun, 5 Sep 2021 07:25:52 +0000 Subject: [PATCH 092/125] fix minor typo --- src/Storages/StorageMemory.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/StorageMemory.cpp b/src/Storages/StorageMemory.cpp index 6823f661984..6ec3c6c8ac6 100644 --- a/src/Storages/StorageMemory.cpp +++ b/src/Storages/StorageMemory.cpp @@ -61,7 +61,7 @@ protected: const Block & src = (*data)[current_index]; Columns columns; - columns.reserve(columns.size()); + columns.reserve(column_names_and_types.size()); /// Add only required columns to `res`. for (const auto & elem : column_names_and_types) From b40b4b0b4ddd16e7792a88acd0b7cf6a2c4e2d11 Mon Sep 17 00:00:00 2001 From: kirillikoff Date: Sun, 5 Sep 2021 14:37:24 +0300 Subject: [PATCH 093/125] DOCSUP-13418: Document the regexp_max_matches_per_row setting (#28398) * Document the regexp_max_matches_per_row setting * Update docs/en/operations/settings/settings.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> * Document the regexp_max_matches_per_row setting ru version * Update docs/ru/operations/settings/settings.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> Co-authored-by: Tatiana Kirillova Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- docs/en/operations/settings/settings.md | 10 ++++++++++ docs/ru/operations/settings/settings.md | 12 +++++++++++- 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index a1c7d1aab32..5d162f6a426 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -3457,3 +3457,13 @@ Possible values: - 1 — Projection optimization is obligatory. Default value: `0`. + +## regexp_max_matches_per_row {#regexp-max-matches-per-row} + +Sets the maximum number of matches for a single regular expression per row. Use it to protect against memory overload when using greedy regular expression in the [extractAllGroupsHorizontal](../../sql-reference/functions/string-search-functions.md#extractallgroups-horizontal) function. + +Possible values: + +- Positive integer. + +Default value: `1000`. diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index aac9c30658c..56b04e81a94 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -3273,4 +3273,14 @@ SETTINGS index_granularity = 8192 │ - 0 — Проекции используются опционально. - 1 — Проекции обязательно используются. -Значение по умолчанию: `0`. \ No newline at end of file +Значение по умолчанию: `0`. + +## regexp_max_matches_per_row {#regexp-max-matches-per-row} + +Задает максимальное количество совпадений для регулярного выражения. Настройка применяется для защиты памяти от перегрузки при использовании "жадных" квантификаторов в регулярном выражении для функции [extractAllGroupsHorizontal](../../sql-reference/functions/string-search-functions.md#extractallgroups-horizontal). + +Возможные значения: + +- Положительное целое число. + +Значение по умолчанию: `1000`. \ No newline at end of file From 40d4d64a65e10739b2975e8055490a2a03d0e688 Mon Sep 17 00:00:00 2001 From: alesapin Date: Sun, 5 Sep 2021 15:50:25 +0300 Subject: [PATCH 094/125] Fix PVS Image --- docker/test/pvs/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/test/pvs/Dockerfile b/docker/test/pvs/Dockerfile index 35e07748845..438f0bd07ec 100644 --- a/docker/test/pvs/Dockerfile +++ b/docker/test/pvs/Dockerfile @@ -38,7 +38,7 @@ RUN set -x \ && dpkg -i "${PKG_VERSION}.deb" CMD echo "Running PVS version $PKG_VERSION" && cd /repo_folder && pvs-studio-analyzer credentials $LICENCE_NAME $LICENCE_KEY -o ./licence.lic \ - && cmake . -D"ENABLE_EMBEDDED_COMPILER"=OFF -D"USE_INTERNAL_PROTOBUF_LIBRARY"=OFF -D"USE_INTERNAL_GRPC_LIBRARY"=OFF \ + && cmake . -D"ENABLE_EMBEDDED_COMPILER"=OFF -D"USE_INTERNAL_PROTOBUF_LIBRARY"=OFF -D"USE_INTERNAL_GRPC_LIBRARY"=OFF -DCMAKE_C_COMPILER=clang-12 -DCMAKE_CXX_COMPILER=clang\+\+-12 \ && ninja re2_st clickhouse_grpc_protos \ && pvs-studio-analyzer analyze -o pvs-studio.log -e contrib -j 4 -l ./licence.lic; \ cp /repo_folder/pvs-studio.log /test_output; \ From 04f0efaaf4177e496b9d7146d1b0450b627f415d Mon Sep 17 00:00:00 2001 From: George Date: Mon, 6 Sep 2021 02:06:06 +0300 Subject: [PATCH 095/125] first draft --- docs/en/operations/settings/settings.md | 33 +++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 5d162f6a426..221bdf0e720 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -260,6 +260,39 @@ If an error occurred while reading rows but the error counter is still less than If both `input_format_allow_errors_num` and `input_format_allow_errors_ratio` are exceeded, ClickHouse throws an exception. +## input_format_parquet_import_nested {#input_format_parquet_import_nested} + +Enables or disables the ability to insert [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) structures into [Parquet](../../interfaces/formats.md#data-format-parquet) input format as an array of structs. + +Possible values: + +- 0 — Disabled. +- 1 — Enabled. + +Default value: `0`. + +## input_format_arrow_import_nested {#input_format_arrow_import_nested} + +Enables or disables the ability to insert [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) structures into [Arrow](../../interfaces/formats.md#data_types-matching-arrow) input format as an array of structs. + +Possible values: + +- 0 — Disabled. +- 1 — Enabled. + +Default value: `0`. + +## input_format_orc_import_nested {#input_format_orc_import_nested} + +Enables or disables the ability to insert [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) structures into [ORC](../../interfaces/formats.md#data-format-orc) input format as an array of structs. + +Possible values: + +- 0 — Disabled. +- 1 — Enabled. + +Default value: `0`. + ## input_format_values_interpret_expressions {#settings-input_format_values_interpret_expressions} Enables or disables the full SQL parser if the fast stream parser can’t parse the data. This setting is used only for the [Values](../../interfaces/formats.md#data-format-values) format at the data insertion. For more information about syntax parsing, see the [Syntax](../../sql-reference/syntax.md) section. From 18a7adf0fa17667ef03829d122d88ac23cf93d71 Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Sun, 29 Aug 2021 16:49:30 +0800 Subject: [PATCH 096/125] Fix NOT-IN index optimization when not all keys are used. --- src/Interpreters/Set.cpp | 8 ++++---- src/Interpreters/Set.h | 4 +++- .../01891_not_in_partition_prune.reference | 2 ++ .../0_stateless/01891_not_in_partition_prune.sql | 15 +++++++++++++++ 4 files changed, 24 insertions(+), 5 deletions(-) diff --git a/src/Interpreters/Set.cpp b/src/Interpreters/Set.cpp index 5ab59ba3f07..5304859aeea 100644 --- a/src/Interpreters/Set.cpp +++ b/src/Interpreters/Set.cpp @@ -402,8 +402,8 @@ void Set::checkTypesEqual(size_t set_type_idx, const DataTypePtr & other_type) c + data_types[set_type_idx]->getName() + " on the right", ErrorCodes::TYPE_MISMATCH); } -MergeTreeSetIndex::MergeTreeSetIndex(const Columns & set_elements, std::vector && index_mapping_) - : indexes_mapping(std::move(index_mapping_)) +MergeTreeSetIndex::MergeTreeSetIndex(const Columns & set_elements, std::vector && indexes_mapping_) + : has_all_keys(set_elements.size() == indexes_mapping_.size()), indexes_mapping(std::move(indexes_mapping_)) { std::sort(indexes_mapping.begin(), indexes_mapping.end(), [](const KeyTuplePositionMapping & l, const KeyTuplePositionMapping & r) @@ -548,11 +548,11 @@ BoolMask MergeTreeSetIndex::checkInRange(const std::vector & key_ranges, break; } } - if (one_element_range) + if (one_element_range && has_all_keys) { /// Here we know that there is one element in range. /// The main difference with the normal case is that we can definitely say that - /// condition in this range always TRUE (can_be_false = 0) xor always FALSE (can_be_true = 0). + /// condition in this range is always TRUE (can_be_false = 0) or always FALSE (can_be_true = 0). /// Check if it's an empty range if (!left_included || !right_included) diff --git a/src/Interpreters/Set.h b/src/Interpreters/Set.h index 727a2c144a1..578913dd0d2 100644 --- a/src/Interpreters/Set.h +++ b/src/Interpreters/Set.h @@ -208,7 +208,7 @@ public: std::vector functions; }; - MergeTreeSetIndex(const Columns & set_elements, std::vector && index_mapping_); + MergeTreeSetIndex(const Columns & set_elements, std::vector && indexes_mapping_); size_t size() const { return ordered_set.at(0)->size(); } @@ -217,6 +217,8 @@ public: BoolMask checkInRange(const std::vector & key_ranges, const DataTypes & data_types) const; private: + // If all arguments in tuple are key columns, we can optimize NOT IN when there is only one element. + bool has_all_keys; Columns ordered_set; std::vector indexes_mapping; diff --git a/tests/queries/0_stateless/01891_not_in_partition_prune.reference b/tests/queries/0_stateless/01891_not_in_partition_prune.reference index 628053cd4f8..9d2517ad760 100644 --- a/tests/queries/0_stateless/01891_not_in_partition_prune.reference +++ b/tests/queries/0_stateless/01891_not_in_partition_prune.reference @@ -4,3 +4,5 @@ 7 107 8 108 9 109 +1970-01-01 1 one +1970-01-01 3 three diff --git a/tests/queries/0_stateless/01891_not_in_partition_prune.sql b/tests/queries/0_stateless/01891_not_in_partition_prune.sql index edbfad93e5d..5bf90fdd65c 100644 --- a/tests/queries/0_stateless/01891_not_in_partition_prune.sql +++ b/tests/queries/0_stateless/01891_not_in_partition_prune.sql @@ -8,3 +8,18 @@ set max_rows_to_read = 5; select * from test1 where i not in (1,2,3,4,5) order by i; drop table test1; + +drop table if exists t1; +drop table if exists t2; + +create table t1 (date Date, a Float64, b String) Engine=MergeTree ORDER BY date; +create table t2 (date Date, a Float64, b String) Engine=MergeTree ORDER BY date; + +insert into t1(a, b) values (1, 'one'), (2, 'two'); +insert into t2(a, b) values (2, 'two'), (3, 'three'); + +select date, a, b from t1 where (date, a, b) NOT IN (select date,a,b from t2); +select date, a, b from t2 where (date, a, b) NOT IN (select date,a,b from t1); + +drop table t1; +drop table t2; From 7b8101f289b33bce483a45bc6f85737225a644b2 Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 6 Sep 2021 09:09:35 +0300 Subject: [PATCH 097/125] Minor change --- src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp b/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp index b43e7656084..1fc279bff23 100644 --- a/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp +++ b/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp @@ -625,9 +625,8 @@ bool MaterializedPostgreSQLConsumer::readFromReplicationSlot() tryLogCurrentException(__PRETTY_FUNCTION__); return false; } - catch (const pqxx::broken_connection & e) + catch (const pqxx::broken_connection &) { - LOG_ERROR(log, "Connection error: {}", e.what()); connection->tryUpdateConnection(); return false; } @@ -641,6 +640,7 @@ bool MaterializedPostgreSQLConsumer::readFromReplicationSlot() if (error_message.find("out of relcache_callback_list slots") == std::string::npos) tryLogCurrentException(__PRETTY_FUNCTION__); + connection->tryUpdateConnection(); return false; } catch (const pqxx::conversion_error & e) From 73ef1233efbd301060e33b170c60c01f80e8bac8 Mon Sep 17 00:00:00 2001 From: alesapin Date: Sun, 5 Sep 2021 16:44:14 +0300 Subject: [PATCH 098/125] Fix tidy Fix tidy one more time --- .clang-tidy | 2 ++ src/Compression/CompressionCodecEncrypted.cpp | 2 +- src/IO/WriteBufferFromFile.cpp | 6 ++--- src/IO/WriteBufferFromFile.h | 1 - src/IO/WriteBufferFromFileDescriptor.cpp | 26 +++++++++---------- src/IO/WriteBufferFromFileDescriptor.h | 25 ++++++++++++------ 6 files changed, 35 insertions(+), 27 deletions(-) diff --git a/.clang-tidy b/.clang-tidy index b0971418e0e..ecb8ac6dcbf 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -203,3 +203,5 @@ CheckOptions: value: CamelCase - key: readability-identifier-naming.UsingCase value: CamelCase + - key: modernize-loop-convert.UseCxx20ReverseRanges + value: false diff --git a/src/Compression/CompressionCodecEncrypted.cpp b/src/Compression/CompressionCodecEncrypted.cpp index 6b921fb9c0a..ba3f12c32a0 100644 --- a/src/Compression/CompressionCodecEncrypted.cpp +++ b/src/Compression/CompressionCodecEncrypted.cpp @@ -113,7 +113,7 @@ namespace DB std::string CompressionCodecEncrypted::deriveKey(const std::string_view & master_key) { - std::string_view salt(""); // No salt: derive keys in a deterministic manner. + std::string_view salt; // No salt: derive keys in a deterministic manner. std::string_view info("Codec Encrypted('AES-128-GCM-SIV') key generation key"); std::array result; diff --git a/src/IO/WriteBufferFromFile.cpp b/src/IO/WriteBufferFromFile.cpp index 67cd7ba27d6..270882d0774 100644 --- a/src/IO/WriteBufferFromFile.cpp +++ b/src/IO/WriteBufferFromFile.cpp @@ -32,7 +32,7 @@ WriteBufferFromFile::WriteBufferFromFile( mode_t mode, char * existing_memory, size_t alignment) - : WriteBufferFromFileDescriptor(-1, buf_size, existing_memory, alignment), file_name(file_name_) + : WriteBufferFromFileDescriptor(-1, buf_size, existing_memory, alignment, file_name_) { ProfileEvents::increment(ProfileEvents::FileOpen); @@ -65,9 +65,7 @@ WriteBufferFromFile::WriteBufferFromFile( size_t buf_size, char * existing_memory, size_t alignment) - : - WriteBufferFromFileDescriptor(fd_, buf_size, existing_memory, alignment), - file_name(original_file_name.empty() ? "(fd = " + toString(fd_) + ")" : original_file_name) + : WriteBufferFromFileDescriptor(fd_, buf_size, existing_memory, alignment, original_file_name) { fd_ = -1; } diff --git a/src/IO/WriteBufferFromFile.h b/src/IO/WriteBufferFromFile.h index b7d58638113..584a0221f1a 100644 --- a/src/IO/WriteBufferFromFile.h +++ b/src/IO/WriteBufferFromFile.h @@ -25,7 +25,6 @@ namespace DB class WriteBufferFromFile : public WriteBufferFromFileDescriptor { protected: - std::string file_name; CurrentMetrics::Increment metric_increment{CurrentMetrics::OpenFileForWrite}; public: diff --git a/src/IO/WriteBufferFromFileDescriptor.cpp b/src/IO/WriteBufferFromFileDescriptor.cpp index cd265653bb9..f1afca171d2 100644 --- a/src/IO/WriteBufferFromFileDescriptor.cpp +++ b/src/IO/WriteBufferFromFileDescriptor.cpp @@ -61,7 +61,9 @@ void WriteBufferFromFileDescriptor::nextImpl() if ((-1 == res || 0 == res) && errno != EINTR) { ProfileEvents::increment(ProfileEvents::WriteBufferFromFileDescriptorWriteFailed); - throwFromErrnoWithPath("Cannot write to file " + getFileName(), getFileName(), + + /// Don't use getFileName() here because this method can be called from destructor + throwFromErrnoWithPath("Cannot write to file " + file_name, file_name, ErrorCodes::CANNOT_WRITE_TO_FILE_DESCRIPTOR); } @@ -74,19 +76,17 @@ void WriteBufferFromFileDescriptor::nextImpl() } -/// Name or some description of file. -std::string WriteBufferFromFileDescriptor::getFileName() const -{ - return "(fd = " + toString(fd) + ")"; -} - - WriteBufferFromFileDescriptor::WriteBufferFromFileDescriptor( int fd_, size_t buf_size, char * existing_memory, - size_t alignment) - : WriteBufferFromFileBase(buf_size, existing_memory, alignment), fd(fd_) {} + size_t alignment, + const std::string & file_name_) + : WriteBufferFromFileBase(buf_size, existing_memory, alignment) + , fd(fd_) + , file_name(file_name_.empty() ? "(fd = " + toString(fd) + ")" : file_name_) +{ +} WriteBufferFromFileDescriptor::~WriteBufferFromFileDescriptor() @@ -115,7 +115,7 @@ void WriteBufferFromFileDescriptor::sync() } -off_t WriteBufferFromFileDescriptor::seek(off_t offset, int whence) +off_t WriteBufferFromFileDescriptor::seek(off_t offset, int whence) // NOLINT { off_t res = lseek(fd, offset, whence); if (-1 == res) @@ -125,7 +125,7 @@ off_t WriteBufferFromFileDescriptor::seek(off_t offset, int whence) } -void WriteBufferFromFileDescriptor::truncate(off_t length) +void WriteBufferFromFileDescriptor::truncate(off_t length) // NOLINT { int res = ftruncate(fd, length); if (-1 == res) @@ -133,7 +133,7 @@ void WriteBufferFromFileDescriptor::truncate(off_t length) } -off_t WriteBufferFromFileDescriptor::size() +off_t WriteBufferFromFileDescriptor::size() const { struct stat buf; int res = fstat(fd, &buf); diff --git a/src/IO/WriteBufferFromFileDescriptor.h b/src/IO/WriteBufferFromFileDescriptor.h index 18c0ac64f63..aef332b38b0 100644 --- a/src/IO/WriteBufferFromFileDescriptor.h +++ b/src/IO/WriteBufferFromFileDescriptor.h @@ -13,17 +13,17 @@ class WriteBufferFromFileDescriptor : public WriteBufferFromFileBase protected: int fd; + /// If file has name contains filename, otherwise contains string "(fd=...)" + std::string file_name; + void nextImpl() override; - - /// Name or some description of file. - std::string getFileName() const override; - public: WriteBufferFromFileDescriptor( int fd_ = -1, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, char * existing_memory = nullptr, - size_t alignment = 0); + size_t alignment = 0, + const std::string & file_name_ = ""); /** Could be used before initialization if needed 'fd' was not passed to constructor. * It's not possible to change 'fd' during work. @@ -42,10 +42,19 @@ public: void sync() override; - off_t seek(off_t offset, int whence); - void truncate(off_t length); + /// clang-tidy wants these methods to be const, but + /// they are not const semantically + off_t seek(off_t offset, int whence); // NOLINT + void truncate(off_t length); // NOLINT - off_t size(); + /// Name or some description of file. + std::string getFileName() const override + { + return file_name; + } + + + off_t size() const; }; } From 2e5e017d6d9adbab69de388eb5cad88bfcf4310b Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 6 Sep 2021 10:52:20 +0300 Subject: [PATCH 099/125] Revert "Fix tidy" This reverts commit 73ef1233efbd301060e33b170c60c01f80e8bac8. --- .clang-tidy | 2 -- src/Compression/CompressionCodecEncrypted.cpp | 2 +- src/IO/WriteBufferFromFile.cpp | 6 +++-- src/IO/WriteBufferFromFile.h | 1 + src/IO/WriteBufferFromFileDescriptor.cpp | 26 +++++++++---------- src/IO/WriteBufferFromFileDescriptor.h | 25 ++++++------------ 6 files changed, 27 insertions(+), 35 deletions(-) diff --git a/.clang-tidy b/.clang-tidy index ecb8ac6dcbf..b0971418e0e 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -203,5 +203,3 @@ CheckOptions: value: CamelCase - key: readability-identifier-naming.UsingCase value: CamelCase - - key: modernize-loop-convert.UseCxx20ReverseRanges - value: false diff --git a/src/Compression/CompressionCodecEncrypted.cpp b/src/Compression/CompressionCodecEncrypted.cpp index ba3f12c32a0..6b921fb9c0a 100644 --- a/src/Compression/CompressionCodecEncrypted.cpp +++ b/src/Compression/CompressionCodecEncrypted.cpp @@ -113,7 +113,7 @@ namespace DB std::string CompressionCodecEncrypted::deriveKey(const std::string_view & master_key) { - std::string_view salt; // No salt: derive keys in a deterministic manner. + std::string_view salt(""); // No salt: derive keys in a deterministic manner. std::string_view info("Codec Encrypted('AES-128-GCM-SIV') key generation key"); std::array result; diff --git a/src/IO/WriteBufferFromFile.cpp b/src/IO/WriteBufferFromFile.cpp index 270882d0774..67cd7ba27d6 100644 --- a/src/IO/WriteBufferFromFile.cpp +++ b/src/IO/WriteBufferFromFile.cpp @@ -32,7 +32,7 @@ WriteBufferFromFile::WriteBufferFromFile( mode_t mode, char * existing_memory, size_t alignment) - : WriteBufferFromFileDescriptor(-1, buf_size, existing_memory, alignment, file_name_) + : WriteBufferFromFileDescriptor(-1, buf_size, existing_memory, alignment), file_name(file_name_) { ProfileEvents::increment(ProfileEvents::FileOpen); @@ -65,7 +65,9 @@ WriteBufferFromFile::WriteBufferFromFile( size_t buf_size, char * existing_memory, size_t alignment) - : WriteBufferFromFileDescriptor(fd_, buf_size, existing_memory, alignment, original_file_name) + : + WriteBufferFromFileDescriptor(fd_, buf_size, existing_memory, alignment), + file_name(original_file_name.empty() ? "(fd = " + toString(fd_) + ")" : original_file_name) { fd_ = -1; } diff --git a/src/IO/WriteBufferFromFile.h b/src/IO/WriteBufferFromFile.h index 584a0221f1a..b7d58638113 100644 --- a/src/IO/WriteBufferFromFile.h +++ b/src/IO/WriteBufferFromFile.h @@ -25,6 +25,7 @@ namespace DB class WriteBufferFromFile : public WriteBufferFromFileDescriptor { protected: + std::string file_name; CurrentMetrics::Increment metric_increment{CurrentMetrics::OpenFileForWrite}; public: diff --git a/src/IO/WriteBufferFromFileDescriptor.cpp b/src/IO/WriteBufferFromFileDescriptor.cpp index f1afca171d2..cd265653bb9 100644 --- a/src/IO/WriteBufferFromFileDescriptor.cpp +++ b/src/IO/WriteBufferFromFileDescriptor.cpp @@ -61,9 +61,7 @@ void WriteBufferFromFileDescriptor::nextImpl() if ((-1 == res || 0 == res) && errno != EINTR) { ProfileEvents::increment(ProfileEvents::WriteBufferFromFileDescriptorWriteFailed); - - /// Don't use getFileName() here because this method can be called from destructor - throwFromErrnoWithPath("Cannot write to file " + file_name, file_name, + throwFromErrnoWithPath("Cannot write to file " + getFileName(), getFileName(), ErrorCodes::CANNOT_WRITE_TO_FILE_DESCRIPTOR); } @@ -76,17 +74,19 @@ void WriteBufferFromFileDescriptor::nextImpl() } +/// Name or some description of file. +std::string WriteBufferFromFileDescriptor::getFileName() const +{ + return "(fd = " + toString(fd) + ")"; +} + + WriteBufferFromFileDescriptor::WriteBufferFromFileDescriptor( int fd_, size_t buf_size, char * existing_memory, - size_t alignment, - const std::string & file_name_) - : WriteBufferFromFileBase(buf_size, existing_memory, alignment) - , fd(fd_) - , file_name(file_name_.empty() ? "(fd = " + toString(fd) + ")" : file_name_) -{ -} + size_t alignment) + : WriteBufferFromFileBase(buf_size, existing_memory, alignment), fd(fd_) {} WriteBufferFromFileDescriptor::~WriteBufferFromFileDescriptor() @@ -115,7 +115,7 @@ void WriteBufferFromFileDescriptor::sync() } -off_t WriteBufferFromFileDescriptor::seek(off_t offset, int whence) // NOLINT +off_t WriteBufferFromFileDescriptor::seek(off_t offset, int whence) { off_t res = lseek(fd, offset, whence); if (-1 == res) @@ -125,7 +125,7 @@ off_t WriteBufferFromFileDescriptor::seek(off_t offset, int whence) // NOLINT } -void WriteBufferFromFileDescriptor::truncate(off_t length) // NOLINT +void WriteBufferFromFileDescriptor::truncate(off_t length) { int res = ftruncate(fd, length); if (-1 == res) @@ -133,7 +133,7 @@ void WriteBufferFromFileDescriptor::truncate(off_t length) // NOLINT } -off_t WriteBufferFromFileDescriptor::size() const +off_t WriteBufferFromFileDescriptor::size() { struct stat buf; int res = fstat(fd, &buf); diff --git a/src/IO/WriteBufferFromFileDescriptor.h b/src/IO/WriteBufferFromFileDescriptor.h index aef332b38b0..18c0ac64f63 100644 --- a/src/IO/WriteBufferFromFileDescriptor.h +++ b/src/IO/WriteBufferFromFileDescriptor.h @@ -13,17 +13,17 @@ class WriteBufferFromFileDescriptor : public WriteBufferFromFileBase protected: int fd; - /// If file has name contains filename, otherwise contains string "(fd=...)" - std::string file_name; - void nextImpl() override; + + /// Name or some description of file. + std::string getFileName() const override; + public: WriteBufferFromFileDescriptor( int fd_ = -1, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, char * existing_memory = nullptr, - size_t alignment = 0, - const std::string & file_name_ = ""); + size_t alignment = 0); /** Could be used before initialization if needed 'fd' was not passed to constructor. * It's not possible to change 'fd' during work. @@ -42,19 +42,10 @@ public: void sync() override; - /// clang-tidy wants these methods to be const, but - /// they are not const semantically - off_t seek(off_t offset, int whence); // NOLINT - void truncate(off_t length); // NOLINT + off_t seek(off_t offset, int whence); + void truncate(off_t length); - /// Name or some description of file. - std::string getFileName() const override - { - return file_name; - } - - - off_t size() const; + off_t size(); }; } From 181bf0dbd65ca82ae5553b474be9b6a7cc710118 Mon Sep 17 00:00:00 2001 From: Dmitrii Kovalkov Date: Mon, 6 Sep 2021 11:11:45 +0300 Subject: [PATCH 100/125] Fix arcadia build --- src/IO/ya.make | 3 ++- src/IO/ya.make.in | 3 ++- src/Storages/ya.make | 1 + 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/IO/ya.make b/src/IO/ya.make index 4723d8a0a17..cc24a690308 100644 --- a/src/IO/ya.make +++ b/src/IO/ya.make @@ -5,6 +5,7 @@ LIBRARY() ADDINCL( contrib/libs/zstd/include + contrib/libs/lz4 contrib/restricted/fast_float/include ) @@ -14,10 +15,10 @@ PEERDIR( contrib/libs/brotli/enc contrib/libs/poco/NetSSL_OpenSSL contrib/libs/zstd + contrib/libs/lz4 contrib/restricted/fast_float ) - SRCS( AIO.cpp AsynchronousReadBufferFromFile.cpp diff --git a/src/IO/ya.make.in b/src/IO/ya.make.in index 3c5460a5378..21e64baa10a 100644 --- a/src/IO/ya.make.in +++ b/src/IO/ya.make.in @@ -4,6 +4,7 @@ LIBRARY() ADDINCL( contrib/libs/zstd/include + contrib/libs/lz4 contrib/restricted/fast_float/include ) @@ -13,10 +14,10 @@ PEERDIR( contrib/libs/brotli/enc contrib/libs/poco/NetSSL_OpenSSL contrib/libs/zstd + contrib/libs/lz4 contrib/restricted/fast_float ) - SRCS( ) diff --git a/src/Storages/ya.make b/src/Storages/ya.make index 0daea968af5..5b246cf5aca 100644 --- a/src/Storages/ya.make +++ b/src/Storages/ya.make @@ -18,6 +18,7 @@ SRCS( Distributed/DirectoryMonitor.cpp Distributed/DistributedSettings.cpp Distributed/DistributedSink.cpp + ExecutablePoolSettings.cpp IStorage.cpp IndicesDescription.cpp JoinSettings.cpp From d1e91a786056901104eef98b530b54a37b62416d Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 6 Sep 2021 12:16:52 +0300 Subject: [PATCH 101/125] Revert "Revert "Fix tidy"" This reverts commit 2e5e017d6d9adbab69de388eb5cad88bfcf4310b. --- .clang-tidy | 2 ++ src/Compression/CompressionCodecEncrypted.cpp | 2 +- src/IO/WriteBufferFromFile.cpp | 6 ++--- src/IO/WriteBufferFromFile.h | 1 - src/IO/WriteBufferFromFileDescriptor.cpp | 26 +++++++++---------- src/IO/WriteBufferFromFileDescriptor.h | 25 ++++++++++++------ 6 files changed, 35 insertions(+), 27 deletions(-) diff --git a/.clang-tidy b/.clang-tidy index b0971418e0e..ecb8ac6dcbf 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -203,3 +203,5 @@ CheckOptions: value: CamelCase - key: readability-identifier-naming.UsingCase value: CamelCase + - key: modernize-loop-convert.UseCxx20ReverseRanges + value: false diff --git a/src/Compression/CompressionCodecEncrypted.cpp b/src/Compression/CompressionCodecEncrypted.cpp index 6b921fb9c0a..ba3f12c32a0 100644 --- a/src/Compression/CompressionCodecEncrypted.cpp +++ b/src/Compression/CompressionCodecEncrypted.cpp @@ -113,7 +113,7 @@ namespace DB std::string CompressionCodecEncrypted::deriveKey(const std::string_view & master_key) { - std::string_view salt(""); // No salt: derive keys in a deterministic manner. + std::string_view salt; // No salt: derive keys in a deterministic manner. std::string_view info("Codec Encrypted('AES-128-GCM-SIV') key generation key"); std::array result; diff --git a/src/IO/WriteBufferFromFile.cpp b/src/IO/WriteBufferFromFile.cpp index 67cd7ba27d6..270882d0774 100644 --- a/src/IO/WriteBufferFromFile.cpp +++ b/src/IO/WriteBufferFromFile.cpp @@ -32,7 +32,7 @@ WriteBufferFromFile::WriteBufferFromFile( mode_t mode, char * existing_memory, size_t alignment) - : WriteBufferFromFileDescriptor(-1, buf_size, existing_memory, alignment), file_name(file_name_) + : WriteBufferFromFileDescriptor(-1, buf_size, existing_memory, alignment, file_name_) { ProfileEvents::increment(ProfileEvents::FileOpen); @@ -65,9 +65,7 @@ WriteBufferFromFile::WriteBufferFromFile( size_t buf_size, char * existing_memory, size_t alignment) - : - WriteBufferFromFileDescriptor(fd_, buf_size, existing_memory, alignment), - file_name(original_file_name.empty() ? "(fd = " + toString(fd_) + ")" : original_file_name) + : WriteBufferFromFileDescriptor(fd_, buf_size, existing_memory, alignment, original_file_name) { fd_ = -1; } diff --git a/src/IO/WriteBufferFromFile.h b/src/IO/WriteBufferFromFile.h index b7d58638113..584a0221f1a 100644 --- a/src/IO/WriteBufferFromFile.h +++ b/src/IO/WriteBufferFromFile.h @@ -25,7 +25,6 @@ namespace DB class WriteBufferFromFile : public WriteBufferFromFileDescriptor { protected: - std::string file_name; CurrentMetrics::Increment metric_increment{CurrentMetrics::OpenFileForWrite}; public: diff --git a/src/IO/WriteBufferFromFileDescriptor.cpp b/src/IO/WriteBufferFromFileDescriptor.cpp index cd265653bb9..f1afca171d2 100644 --- a/src/IO/WriteBufferFromFileDescriptor.cpp +++ b/src/IO/WriteBufferFromFileDescriptor.cpp @@ -61,7 +61,9 @@ void WriteBufferFromFileDescriptor::nextImpl() if ((-1 == res || 0 == res) && errno != EINTR) { ProfileEvents::increment(ProfileEvents::WriteBufferFromFileDescriptorWriteFailed); - throwFromErrnoWithPath("Cannot write to file " + getFileName(), getFileName(), + + /// Don't use getFileName() here because this method can be called from destructor + throwFromErrnoWithPath("Cannot write to file " + file_name, file_name, ErrorCodes::CANNOT_WRITE_TO_FILE_DESCRIPTOR); } @@ -74,19 +76,17 @@ void WriteBufferFromFileDescriptor::nextImpl() } -/// Name or some description of file. -std::string WriteBufferFromFileDescriptor::getFileName() const -{ - return "(fd = " + toString(fd) + ")"; -} - - WriteBufferFromFileDescriptor::WriteBufferFromFileDescriptor( int fd_, size_t buf_size, char * existing_memory, - size_t alignment) - : WriteBufferFromFileBase(buf_size, existing_memory, alignment), fd(fd_) {} + size_t alignment, + const std::string & file_name_) + : WriteBufferFromFileBase(buf_size, existing_memory, alignment) + , fd(fd_) + , file_name(file_name_.empty() ? "(fd = " + toString(fd) + ")" : file_name_) +{ +} WriteBufferFromFileDescriptor::~WriteBufferFromFileDescriptor() @@ -115,7 +115,7 @@ void WriteBufferFromFileDescriptor::sync() } -off_t WriteBufferFromFileDescriptor::seek(off_t offset, int whence) +off_t WriteBufferFromFileDescriptor::seek(off_t offset, int whence) // NOLINT { off_t res = lseek(fd, offset, whence); if (-1 == res) @@ -125,7 +125,7 @@ off_t WriteBufferFromFileDescriptor::seek(off_t offset, int whence) } -void WriteBufferFromFileDescriptor::truncate(off_t length) +void WriteBufferFromFileDescriptor::truncate(off_t length) // NOLINT { int res = ftruncate(fd, length); if (-1 == res) @@ -133,7 +133,7 @@ void WriteBufferFromFileDescriptor::truncate(off_t length) } -off_t WriteBufferFromFileDescriptor::size() +off_t WriteBufferFromFileDescriptor::size() const { struct stat buf; int res = fstat(fd, &buf); diff --git a/src/IO/WriteBufferFromFileDescriptor.h b/src/IO/WriteBufferFromFileDescriptor.h index 18c0ac64f63..aef332b38b0 100644 --- a/src/IO/WriteBufferFromFileDescriptor.h +++ b/src/IO/WriteBufferFromFileDescriptor.h @@ -13,17 +13,17 @@ class WriteBufferFromFileDescriptor : public WriteBufferFromFileBase protected: int fd; + /// If file has name contains filename, otherwise contains string "(fd=...)" + std::string file_name; + void nextImpl() override; - - /// Name or some description of file. - std::string getFileName() const override; - public: WriteBufferFromFileDescriptor( int fd_ = -1, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, char * existing_memory = nullptr, - size_t alignment = 0); + size_t alignment = 0, + const std::string & file_name_ = ""); /** Could be used before initialization if needed 'fd' was not passed to constructor. * It's not possible to change 'fd' during work. @@ -42,10 +42,19 @@ public: void sync() override; - off_t seek(off_t offset, int whence); - void truncate(off_t length); + /// clang-tidy wants these methods to be const, but + /// they are not const semantically + off_t seek(off_t offset, int whence); // NOLINT + void truncate(off_t length); // NOLINT - off_t size(); + /// Name or some description of file. + std::string getFileName() const override + { + return file_name; + } + + + off_t size() const; }; } From 5c75b93fe8b4842fccbe0570514b00680066ac4a Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 6 Sep 2021 12:17:31 +0300 Subject: [PATCH 102/125] Revert one warning --- src/Compression/CompressionCodecEncrypted.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Compression/CompressionCodecEncrypted.cpp b/src/Compression/CompressionCodecEncrypted.cpp index ba3f12c32a0..47f93eb6202 100644 --- a/src/Compression/CompressionCodecEncrypted.cpp +++ b/src/Compression/CompressionCodecEncrypted.cpp @@ -113,7 +113,8 @@ namespace DB std::string CompressionCodecEncrypted::deriveKey(const std::string_view & master_key) { - std::string_view salt; // No salt: derive keys in a deterministic manner. + /// No salt: derive keys in a deterministic manner. + std::string_view salt(""); // NOLINT std::string_view info("Codec Encrypted('AES-128-GCM-SIV') key generation key"); std::array result; From 209b748fcae2f429be40d4582ceff21b7e6a85e4 Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 6 Sep 2021 13:11:19 +0300 Subject: [PATCH 103/125] Add missed level --- src/Interpreters/TextLog.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Interpreters/TextLog.cpp b/src/Interpreters/TextLog.cpp index baf98b6771d..51ffbdd66ee 100644 --- a/src/Interpreters/TextLog.cpp +++ b/src/Interpreters/TextLog.cpp @@ -26,7 +26,8 @@ NamesAndTypesList TextLogElement::getNamesAndTypes() {"Notice", static_cast(Message::PRIO_NOTICE)}, {"Information", static_cast(Message::PRIO_INFORMATION)}, {"Debug", static_cast(Message::PRIO_DEBUG)}, - {"Trace", static_cast(Message::PRIO_TRACE)} + {"Trace", static_cast(Message::PRIO_TRACE)}, + {"Test", static_cast(Message::PRIO_TEST)}, }); return From fcfe77cb5741de5edccd6c713582893f7d2e24f8 Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 6 Sep 2021 13:35:37 +0300 Subject: [PATCH 104/125] Fix allocation in buffer --- src/IO/WriteBufferFromFileDescriptor.cpp | 22 ++++++++++++++++++---- src/IO/WriteBufferFromFileDescriptor.h | 8 ++------ 2 files changed, 20 insertions(+), 10 deletions(-) diff --git a/src/IO/WriteBufferFromFileDescriptor.cpp b/src/IO/WriteBufferFromFileDescriptor.cpp index f1afca171d2..96579626dfc 100644 --- a/src/IO/WriteBufferFromFileDescriptor.cpp +++ b/src/IO/WriteBufferFromFileDescriptor.cpp @@ -63,7 +63,10 @@ void WriteBufferFromFileDescriptor::nextImpl() ProfileEvents::increment(ProfileEvents::WriteBufferFromFileDescriptorWriteFailed); /// Don't use getFileName() here because this method can be called from destructor - throwFromErrnoWithPath("Cannot write to file " + file_name, file_name, + String error_file_name = file_name; + if (error_file_name.empty()) + error_file_name = "(fd = " + toString(fd) + ")"; + throwFromErrnoWithPath("Cannot write to file " + error_file_name, error_file_name, ErrorCodes::CANNOT_WRITE_TO_FILE_DESCRIPTOR); } @@ -75,16 +78,18 @@ void WriteBufferFromFileDescriptor::nextImpl() ProfileEvents::increment(ProfileEvents::WriteBufferFromFileDescriptorWriteBytes, bytes_written); } - +/// NOTE: This class can be used as a very low-level building block, for example +/// in trace collector. In such places allocations of memory can be dangerous, +/// so don't allocate anything in this consturctor. WriteBufferFromFileDescriptor::WriteBufferFromFileDescriptor( int fd_, size_t buf_size, char * existing_memory, size_t alignment, - const std::string & file_name_) + std::string file_name_) : WriteBufferFromFileBase(buf_size, existing_memory, alignment) , fd(fd_) - , file_name(file_name_.empty() ? "(fd = " + toString(fd) + ")" : file_name_) + , file_name(std::move(file_name_)) { } @@ -142,4 +147,13 @@ off_t WriteBufferFromFileDescriptor::size() const return buf.st_size; } +std::string WriteBufferFromFileDescriptor::getFileName() const +{ + if (file_name.empty()) + return "(fd = " + toString(fd) + ")"; + + return file_name; +} + + } diff --git a/src/IO/WriteBufferFromFileDescriptor.h b/src/IO/WriteBufferFromFileDescriptor.h index aef332b38b0..cad45067548 100644 --- a/src/IO/WriteBufferFromFileDescriptor.h +++ b/src/IO/WriteBufferFromFileDescriptor.h @@ -23,7 +23,7 @@ public: size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, char * existing_memory = nullptr, size_t alignment = 0, - const std::string & file_name_ = ""); + std::string file_name_ = ""); /** Could be used before initialization if needed 'fd' was not passed to constructor. * It's not possible to change 'fd' during work. @@ -48,11 +48,7 @@ public: void truncate(off_t length); // NOLINT /// Name or some description of file. - std::string getFileName() const override - { - return file_name; - } - + std::string getFileName() const override; off_t size() const; }; From 9db10a7164e0e22a3749d360244da995a38bec44 Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 6 Sep 2021 14:34:17 +0300 Subject: [PATCH 105/125] Fix typo: --- src/IO/WriteBufferFromFileDescriptor.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/IO/WriteBufferFromFileDescriptor.cpp b/src/IO/WriteBufferFromFileDescriptor.cpp index 96579626dfc..38aaa945362 100644 --- a/src/IO/WriteBufferFromFileDescriptor.cpp +++ b/src/IO/WriteBufferFromFileDescriptor.cpp @@ -80,7 +80,7 @@ void WriteBufferFromFileDescriptor::nextImpl() /// NOTE: This class can be used as a very low-level building block, for example /// in trace collector. In such places allocations of memory can be dangerous, -/// so don't allocate anything in this consturctor. +/// so don't allocate anything in this constructor. WriteBufferFromFileDescriptor::WriteBufferFromFileDescriptor( int fd_, size_t buf_size, From 47f46e388da63be9eef8fc8f8da57e70d8e0d690 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Mon, 6 Sep 2021 16:54:26 +0300 Subject: [PATCH 106/125] fix optimization of disjunctions chain --- .../LogicalExpressionsOptimizer.cpp | 24 +++++++------------ .../02023_transform_or_to_in.reference | 2 ++ .../0_stateless/02023_transform_or_to_in.sql | 15 ++++++++++++ 3 files changed, 26 insertions(+), 15 deletions(-) create mode 100644 tests/queries/0_stateless/02023_transform_or_to_in.reference create mode 100644 tests/queries/0_stateless/02023_transform_or_to_in.sql diff --git a/src/Interpreters/LogicalExpressionsOptimizer.cpp b/src/Interpreters/LogicalExpressionsOptimizer.cpp index c0d5a16fa65..936ed0149d2 100644 --- a/src/Interpreters/LogicalExpressionsOptimizer.cpp +++ b/src/Interpreters/LogicalExpressionsOptimizer.cpp @@ -225,22 +225,19 @@ void LogicalExpressionsOptimizer::addInExpression(const DisjunctiveEqualityChain /// 1. Create a new IN expression based on information from the OR-chain. - /// Construct a list of literals `x1, ..., xN` from the string `expr = x1 OR ... OR expr = xN` - ASTPtr value_list = std::make_shared(); + /// Construct a tuple of literals `x1, ..., xN` from the string `expr = x1 OR ... OR expr = xN` + + Tuple tuple; + tuple.reserve(equality_functions.size()); + for (const auto * function : equality_functions) { const auto & operands = getFunctionOperands(function); - value_list->children.push_back(operands[1]); + tuple.push_back(operands[1]->as()->value); } /// Sort the literals so that they are specified in the same order in the IN expression. - /// Otherwise, they would be specified in the order of the ASTLiteral addresses, which is nondeterministic. - std::sort(value_list->children.begin(), value_list->children.end(), [](const DB::ASTPtr & lhs, const DB::ASTPtr & rhs) - { - const auto * val_lhs = lhs->as(); - const auto * val_rhs = rhs->as(); - return val_lhs->value < val_rhs->value; - }); + std::sort(tuple.begin(), tuple.end()); /// Get the expression `expr` from the chain `expr = x1 OR ... OR expr = xN` ASTPtr equals_expr_lhs; @@ -250,14 +247,11 @@ void LogicalExpressionsOptimizer::addInExpression(const DisjunctiveEqualityChain equals_expr_lhs = operands[0]; } - auto tuple_function = std::make_shared(); - tuple_function->name = "tuple"; - tuple_function->arguments = value_list; - tuple_function->children.push_back(tuple_function->arguments); + auto tuple_literal = std::make_shared(std::move(tuple)); ASTPtr expression_list = std::make_shared(); expression_list->children.push_back(equals_expr_lhs); - expression_list->children.push_back(tuple_function); + expression_list->children.push_back(tuple_literal); /// Construct the expression `expr IN (x1, ..., xN)` auto in_function = std::make_shared(); diff --git a/tests/queries/0_stateless/02023_transform_or_to_in.reference b/tests/queries/0_stateless/02023_transform_or_to_in.reference new file mode 100644 index 00000000000..aa47d0d46d4 --- /dev/null +++ b/tests/queries/0_stateless/02023_transform_or_to_in.reference @@ -0,0 +1,2 @@ +0 +0 diff --git a/tests/queries/0_stateless/02023_transform_or_to_in.sql b/tests/queries/0_stateless/02023_transform_or_to_in.sql new file mode 100644 index 00000000000..c4ceeb76931 --- /dev/null +++ b/tests/queries/0_stateless/02023_transform_or_to_in.sql @@ -0,0 +1,15 @@ +DROP TABLE IF EXISTS t_transform_or; + +CREATE TABLE t_transform_or(B AggregateFunction(uniq, String), A String) Engine=MergeTree ORDER BY (A); + +INSERT INTO t_transform_or SELECT uniqState(''), '0'; + +SELECT uniqMergeIf(B, (A = '1') OR (A = '2') OR (A = '3')) +FROM cluster(test_cluster_two_shards, currentDatabase(), t_transform_or) +SETTINGS legacy_column_name_of_tuple_literal = 0; + +SELECT uniqMergeIf(B, (A = '1') OR (A = '2') OR (A = '3')) +FROM cluster(test_cluster_two_shards, currentDatabase(), t_transform_or) +SETTINGS legacy_column_name_of_tuple_literal = 1; + +DROP TABLE t_transform_or; From 7bea8200d80b24022c285b252258805a5427e3d2 Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 6 Sep 2021 19:18:09 +0000 Subject: [PATCH 107/125] Revert "Merge pull request #28082 from zhongyuankai/add-system-of-table_views" This reverts commit e5bcfba89e379c458ba15475421fdbcf8614cd0e, reversing changes made to d77f2436517712e0d3678533cd464c60fe5a0aed. --- src/Interpreters/DatabaseCatalog.cpp | 6 -- src/Interpreters/DatabaseCatalog.h | 1 - src/Storages/System/StorageSystemViews.cpp | 68 ------------------- src/Storages/System/StorageSystemViews.h | 24 ------- src/Storages/System/attachSystemTables.cpp | 2 - .../0_stateless/02015_system_views.reference | 1 - .../0_stateless/02015_system_views.sql | 14 ---- tests/queries/skip_list.json | 3 +- 8 files changed, 1 insertion(+), 118 deletions(-) delete mode 100644 src/Storages/System/StorageSystemViews.cpp delete mode 100644 src/Storages/System/StorageSystemViews.h delete mode 100644 tests/queries/0_stateless/02015_system_views.reference delete mode 100644 tests/queries/0_stateless/02015_system_views.sql diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp index af60eeeaba3..20ebc0a9ee5 100644 --- a/src/Interpreters/DatabaseCatalog.cpp +++ b/src/Interpreters/DatabaseCatalog.cpp @@ -617,12 +617,6 @@ Dependencies DatabaseCatalog::getDependencies(const StorageID & from) const return Dependencies(iter->second.begin(), iter->second.end()); } -ViewDependencies DatabaseCatalog::getViewDependencies() const -{ - std::lock_guard lock{databases_mutex}; - return ViewDependencies(view_dependencies.begin(), view_dependencies.end()); -} - void DatabaseCatalog::updateDependency(const StorageID & old_from, const StorageID & old_where, const StorageID & new_from, const StorageID & new_where) diff --git a/src/Interpreters/DatabaseCatalog.h b/src/Interpreters/DatabaseCatalog.h index 72dd28d335b..071b80690df 100644 --- a/src/Interpreters/DatabaseCatalog.h +++ b/src/Interpreters/DatabaseCatalog.h @@ -175,7 +175,6 @@ public: void addDependency(const StorageID & from, const StorageID & where); void removeDependency(const StorageID & from, const StorageID & where); Dependencies getDependencies(const StorageID & from) const; - ViewDependencies getViewDependencies() const; /// For Materialized and Live View void updateDependency(const StorageID & old_from, const StorageID & old_where,const StorageID & new_from, const StorageID & new_where); diff --git a/src/Storages/System/StorageSystemViews.cpp b/src/Storages/System/StorageSystemViews.cpp deleted file mode 100644 index 0bb2724b358..00000000000 --- a/src/Storages/System/StorageSystemViews.cpp +++ /dev/null @@ -1,68 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include - -namespace DB -{ - -class Context; - -NamesAndTypesList StorageSystemViews::getNamesAndTypes() -{ - auto view_type_datatype = std::make_shared(DataTypeEnum8::Values{ - {"Default", static_cast(QueryViewsLogElement::ViewType::DEFAULT)}, - {"Materialized", static_cast(QueryViewsLogElement::ViewType::MATERIALIZED)}, - {"Live", static_cast(QueryViewsLogElement::ViewType::LIVE)}}); - - return { - {"database", std::make_shared()}, - {"name", std::make_shared()}, - {"main_dependency_database", std::make_shared()}, - {"main_dependency_table", std::make_shared()}, - {"view_type", std::move(view_type_datatype)}, - }; -} - -void StorageSystemViews::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const -{ - const auto access = context->getAccess(); - const bool check_access_for_databases = !access->isGranted(AccessType::SHOW_TABLES); - - for (const auto & [table_id, view_ids] : DatabaseCatalog::instance().getViewDependencies()) - { - const bool check_access_for_tables = check_access_for_databases && !access->isGranted(AccessType::SHOW_TABLES, table_id.database_name); - - if (check_access_for_tables && !access->isGranted(AccessType::SHOW_TABLES, table_id.database_name, table_id.table_name)) - continue; - - size_t col_num; - for (const auto & view_id : view_ids) - { - auto view_ptr = DatabaseCatalog::instance().getTable(view_id, context); - QueryViewsLogElement::ViewType type = QueryViewsLogElement::ViewType::DEFAULT; - - if (typeid_cast(view_ptr.get())) - { - type = QueryViewsLogElement::ViewType::MATERIALIZED; - } - else if (typeid_cast(view_ptr.get())) - { - type = QueryViewsLogElement::ViewType::LIVE; - } - - col_num = 0; - res_columns[col_num++]->insert(view_id.database_name); - res_columns[col_num++]->insert(view_id.table_name); - res_columns[col_num++]->insert(table_id.database_name); - res_columns[col_num++]->insert(table_id.table_name); - res_columns[col_num++]->insert(type); - } - } -} - -} diff --git a/src/Storages/System/StorageSystemViews.h b/src/Storages/System/StorageSystemViews.h deleted file mode 100644 index 67fcb79067e..00000000000 --- a/src/Storages/System/StorageSystemViews.h +++ /dev/null @@ -1,24 +0,0 @@ -#pragma once - -#include -#include - -namespace DB -{ - -class StorageSystemViews final : public shared_ptr_helper, public IStorageSystemOneBlock -{ - friend struct shared_ptr_helper; -protected: - using IStorageSystemOneBlock::IStorageSystemOneBlock; - - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const override; - -public: - std::string getName() const override { return "SystemViews"; } - - static NamesAndTypesList getNamesAndTypes(); - -}; - -} diff --git a/src/Storages/System/attachSystemTables.cpp b/src/Storages/System/attachSystemTables.cpp index 3656a239adb..95e86487073 100644 --- a/src/Storages/System/attachSystemTables.cpp +++ b/src/Storages/System/attachSystemTables.cpp @@ -44,7 +44,6 @@ #include #include #include -#include #include #include #include @@ -96,7 +95,6 @@ void attachSystemTablesLocal(IDatabase & system_database) attach(system_database, "zeros_mt", true); attach(system_database, "databases"); attach(system_database, "tables"); - attach(system_database, "views"); attach(system_database, "columns"); attach(system_database, "functions"); attach(system_database, "events"); diff --git a/tests/queries/0_stateless/02015_system_views.reference b/tests/queries/0_stateless/02015_system_views.reference deleted file mode 100644 index a1b1b2a9fd3..00000000000 --- a/tests/queries/0_stateless/02015_system_views.reference +++ /dev/null @@ -1 +0,0 @@ -02015_db materialized_view 02015_db view_source_tb Materialized diff --git a/tests/queries/0_stateless/02015_system_views.sql b/tests/queries/0_stateless/02015_system_views.sql deleted file mode 100644 index a6375dcb591..00000000000 --- a/tests/queries/0_stateless/02015_system_views.sql +++ /dev/null @@ -1,14 +0,0 @@ -DROP DATABASE IF EXISTS 02015_db; -CREATE DATABASE IF NOT EXISTS 02015_db; - -DROP TABLE IF EXISTS 02015_db.view_source_tb; -CREATE TABLE IF NOT EXISTS 02015_db.view_source_tb (a UInt8, s String) ENGINE = MergeTree() ORDER BY a; - -DROP TABLE IF EXISTS 02015_db.materialized_view; -CREATE MATERIALIZED VIEW IF NOT EXISTS 02015_db.materialized_view ENGINE = ReplacingMergeTree() ORDER BY a AS SELECT * FROM 02015_db.view_source_tb; - -SELECT * FROM system.views WHERE database='02015_db' and name = 'materialized_view'; - -DROP TABLE IF EXISTS 02015_db.materialized_view; -DROP TABLE IF EXISTS 02015_db.view_source_tb; -DROP DATABASE IF EXISTS 02015_db; diff --git a/tests/queries/skip_list.json b/tests/queries/skip_list.json index 0143cc78dbe..335ed370b9b 100644 --- a/tests/queries/skip_list.json +++ b/tests/queries/skip_list.json @@ -512,7 +512,6 @@ "01532_execute_merges_on_single_replica", /// static zk path "01530_drop_database_atomic_sync", /// creates database "02001_add_default_database_to_system_users", ///create user - "02002_row_level_filter_bug", ///create user - "02015_system_views" + "02002_row_level_filter_bug" ///create user ] } From 3f1e904722530680bea03964998807aed8a78959 Mon Sep 17 00:00:00 2001 From: George Date: Mon, 6 Sep 2021 23:31:34 +0300 Subject: [PATCH 108/125] Second iteration --- docs/en/interfaces/formats.md | 6 ++++++ docs/en/operations/settings/settings.md | 6 +++--- .../data-types/nested-data-structures/nested.md | 8 +++++++- 3 files changed, 16 insertions(+), 4 deletions(-) diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index bac67ecf140..d28045ce213 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -1270,6 +1270,8 @@ You can insert Parquet data from a file into ClickHouse table by the following c $ cat {filename} | clickhouse-client --query="INSERT INTO {some_table} FORMAT Parquet" ``` +To insert `Array(Struct)` columns into [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) columns use the setting [input_format_parquet_import_nested](../operations/settings/settings.md#input_format_parquet_import_nested). + You can select data from a ClickHouse table and save them into some file in the Parquet format by the following command: ``` bash @@ -1328,6 +1330,8 @@ You can insert Arrow data from a file into ClickHouse table by the following com $ cat filename.arrow | clickhouse-client --query="INSERT INTO some_table FORMAT Arrow" ``` +To insert `Array(Struct)` columns into [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) columns use the setting [input_format_arrow_import_nested](../operations/settings/settings.md#input_format_arrow_import_nested). + ### Selecting Data {#selecting-data-arrow} You can select data from a ClickHouse table and save them into some file in the Arrow format by the following command: @@ -1384,6 +1388,8 @@ You can insert ORC data from a file into ClickHouse table by the following comma $ cat filename.orc | clickhouse-client --query="INSERT INTO some_table FORMAT ORC" ``` +To insert `Array(Struct)` columns into [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) columns use the setting [input_format_orc_import_nested](../operations/settings/settings.md#input_format_orc_import_nested). + ### Selecting Data {#selecting-data-2} You can select data from a ClickHouse table and save them into some file in the ORC format by the following command: diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 221bdf0e720..e8355eeaa1d 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -262,7 +262,7 @@ If both `input_format_allow_errors_num` and `input_format_allow_errors_ratio` ar ## input_format_parquet_import_nested {#input_format_parquet_import_nested} -Enables or disables the ability to insert [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) structures into [Parquet](../../interfaces/formats.md#data-format-parquet) input format as an array of structs. +Enables or disables the ability to insert `Array(Struct)` columns into [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) columns in [Parquet](../../interfaces/formats.md#data-format-parquet) input format. Possible values: @@ -273,7 +273,7 @@ Default value: `0`. ## input_format_arrow_import_nested {#input_format_arrow_import_nested} -Enables or disables the ability to insert [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) structures into [Arrow](../../interfaces/formats.md#data_types-matching-arrow) input format as an array of structs. +Enables or disables the ability to insert `Array(Struct)` columns into [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) columns in [Arrow](../../interfaces/formats.md#data_types-matching-arrow) input format. Possible values: @@ -284,7 +284,7 @@ Default value: `0`. ## input_format_orc_import_nested {#input_format_orc_import_nested} -Enables or disables the ability to insert [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) structures into [ORC](../../interfaces/formats.md#data-format-orc) input format as an array of structs. +Enables or disables the ability to insert `Array(Struct)` columns into [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) columns in [ORC](../../interfaces/formats.md#data-format-orc) input format. Possible values: diff --git a/docs/en/sql-reference/data-types/nested-data-structures/nested.md b/docs/en/sql-reference/data-types/nested-data-structures/nested.md index ec6c613a956..281465f2075 100644 --- a/docs/en/sql-reference/data-types/nested-data-structures/nested.md +++ b/docs/en/sql-reference/data-types/nested-data-structures/nested.md @@ -3,7 +3,9 @@ toc_priority: 57 toc_title: Nested(Name1 Type1, Name2 Type2, ...) --- -# Nested(name1 Type1, Name2 Type2, …) {#nestedname1-type1-name2-type2} +# Nested {#nested} + +## Nested(name1 Type1, Name2 Type2, …) {#nestedname1-type1-name2-type2} A nested data structure is like a table inside a cell. The parameters of a nested data structure – the column names and types – are specified the same way as in a [CREATE TABLE](../../../sql-reference/statements/create/table.md) query. Each table row can correspond to any number of rows in a nested data structure. @@ -101,4 +103,8 @@ For a DESCRIBE query, the columns in a nested data structure are listed separate The ALTER query for elements in a nested data structure has limitations. +## Inserting {#inserting} + +It is possible to insert `Array(Struct)` columns into nested columns with the settings [input_format_parquet_import_nested](../../../operations/settings/settings.md#input_format_parquet_import_nested), [input_format_arrow_import_nested](../../../operations/settings/settings.md#input_format_arrow_import_nested) and [input_format_orc_import_nested](../../../operations/settings/settings.md#input_format_orc_import_nested) in [Parquet](../../../interfaces/formats.md#data-format-parquet), [Arrow](../../../interfaces/formats.md#data_types-matching-arrow) and [ORC](../../../interfaces/formats.md#data-format-orc) input formats correspondingly. + [Original article](https://clickhouse.tech/docs/en/data_types/nested_data_structures/nested/) From dbdb3356ba4f0d3c02a3d8ff2da0063509a4f596 Mon Sep 17 00:00:00 2001 From: George Date: Mon, 6 Sep 2021 23:34:43 +0300 Subject: [PATCH 109/125] fixed links --- docs/en/interfaces/formats.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index d28045ce213..a83ee999070 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -1270,7 +1270,7 @@ You can insert Parquet data from a file into ClickHouse table by the following c $ cat {filename} | clickhouse-client --query="INSERT INTO {some_table} FORMAT Parquet" ``` -To insert `Array(Struct)` columns into [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) columns use the setting [input_format_parquet_import_nested](../operations/settings/settings.md#input_format_parquet_import_nested). +To insert `Array(Struct)` columns into [Nested](../sql-reference/data-types/nested-data-structures/nested.md) columns use the setting [input_format_parquet_import_nested](../operations/settings/settings.md#input_format_parquet_import_nested). You can select data from a ClickHouse table and save them into some file in the Parquet format by the following command: @@ -1330,7 +1330,7 @@ You can insert Arrow data from a file into ClickHouse table by the following com $ cat filename.arrow | clickhouse-client --query="INSERT INTO some_table FORMAT Arrow" ``` -To insert `Array(Struct)` columns into [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) columns use the setting [input_format_arrow_import_nested](../operations/settings/settings.md#input_format_arrow_import_nested). +To insert `Array(Struct)` columns into [Nested](../sql-reference/data-types/nested-data-structures/nested.md) columns use the setting [input_format_arrow_import_nested](../operations/settings/settings.md#input_format_arrow_import_nested). ### Selecting Data {#selecting-data-arrow} @@ -1388,7 +1388,7 @@ You can insert ORC data from a file into ClickHouse table by the following comma $ cat filename.orc | clickhouse-client --query="INSERT INTO some_table FORMAT ORC" ``` -To insert `Array(Struct)` columns into [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) columns use the setting [input_format_orc_import_nested](../operations/settings/settings.md#input_format_orc_import_nested). +To insert `Array(Struct)` columns into [Nested](../sql-reference/data-types/nested-data-structures/nested.md) columns use the setting [input_format_orc_import_nested](../operations/settings/settings.md#input_format_orc_import_nested). ### Selecting Data {#selecting-data-2} From cbc10cd3c0a5b266ec261edbf916307f6e61042f Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 6 Sep 2021 21:11:21 +0000 Subject: [PATCH 110/125] Revert "Merge pull request #28397 from zhongyuankai/DOCSUP-13927-document-system_views" This reverts commit d6f89fd9cfbdb39da4be8248007d68df0f09e6d1, reversing changes made to 68f6ecec62b60ec9826da459663e4da0e13d7bfb. --- docs/en/operations/system-tables/views.md | 44 ----------------------- 1 file changed, 44 deletions(-) delete mode 100644 docs/en/operations/system-tables/views.md diff --git a/docs/en/operations/system-tables/views.md b/docs/en/operations/system-tables/views.md deleted file mode 100644 index 8edebf00a91..00000000000 --- a/docs/en/operations/system-tables/views.md +++ /dev/null @@ -1,44 +0,0 @@ -# system.views {#system-views} - -Contains the dependencies of all views and the type to which the view belongs. The metadata of the view comes from the [system.tables](tables.md). - -Columns: - -- `database` ([String](../../sql-reference/data-types/string.md)) — The name of the database the view is in. - -- `name` ([String](../../sql-reference/data-types/string.md)) — Name of the view. - -- `main_dependency_database` ([String](../../sql-reference/data-types/string.md)) — The name of the database on which the view depends. - -- `main_dependency_table` ([String](../../sql-reference/data-types/string.md)) - The name of the table on which the view depends. - -- `view_type` ([Enum8](../../sql-reference/data-types/enum.md)) — Type of the view. Values: - - `'Default' = 1` — [Default views](../../sql-reference/statements/create/view.md#normal). Should not appear in this log. - - `'Materialized' = 2` — [Materialized views](../../sql-reference/statements/create/view.md#materialized). - - `'Live' = 3` — [Live views](../../sql-reference/statements/create/view.md#live-view). - -**Example** - -```sql -SELECT * FROM system.views LIMIT 2 FORMAT Vertical; -``` - -```text -Row 1: -────── -database: default -name: live_view -main_dependency_database: default -main_dependency_table: view_source_tb -view_type: Live - -Row 2: -────── -database: default -name: materialized_view -main_dependency_database: default -main_dependency_table: view_source_tb -view_type: Materialized -``` - -[Original article](https://clickhouse.tech/docs/en/operations/system-tables/views) From 503b7a59f07f5da9d8cad7a6196be1939582e2f2 Mon Sep 17 00:00:00 2001 From: bharatnc Date: Mon, 6 Sep 2021 09:54:59 -0700 Subject: [PATCH 111/125] fix getNumberOfArguments() for s2Rect functions This fixes the value returned by the getNumberOfArguments() by the s2RectAdd and the s2RectContains functions. Only 3 arguments are used by these functions and not 4: - low s2Point of rectangle - high s2Point of rectangle - given s2Point The given s2Point is used to groow the size of the bounding rectangle to include the given S2Point in case of the s2RectAdd function. In case of s2RectContains, the function determines if the bounded rectangle contains the given point. PS: I wonder if it's more apt to call rectAdd as rectGrow since it's used to grow the size of a given rectangle. --- src/Functions/s2RectAdd.cpp | 2 +- src/Functions/s2RectContains.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Functions/s2RectAdd.cpp b/src/Functions/s2RectAdd.cpp index 90578567da2..d9b12ce22a3 100644 --- a/src/Functions/s2RectAdd.cpp +++ b/src/Functions/s2RectAdd.cpp @@ -41,7 +41,7 @@ public: return name; } - size_t getNumberOfArguments() const override { return 4; } + size_t getNumberOfArguments() const override { return 3; } bool useDefaultImplementationForConstants() const override { return true; } diff --git a/src/Functions/s2RectContains.cpp b/src/Functions/s2RectContains.cpp index 5f556c3ec14..27fed9e2031 100644 --- a/src/Functions/s2RectContains.cpp +++ b/src/Functions/s2RectContains.cpp @@ -41,7 +41,7 @@ public: return name; } - size_t getNumberOfArguments() const override { return 4; } + size_t getNumberOfArguments() const override { return 3; } bool useDefaultImplementationForConstants() const override { return true; } From bcc31f1f3e9616940eb8e3ddd3a51b89d368c734 Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Mon, 6 Sep 2021 22:42:32 +0300 Subject: [PATCH 112/125] Remove unnecessary changes. --- programs/server/Server.cpp | 3 --- src/Access/AccessControlManager.h | 2 -- src/Access/SettingsProfilesCache.cpp | 1 + src/Core/MySQL/Authentication.cpp | 2 -- src/Core/MySQL/MySQLSession.h | 19 ------------------- src/Core/PostgreSQLProtocol.h | 3 +-- src/Interpreters/Context.cpp | 18 ++---------------- src/Interpreters/Context.h | 12 ------------ src/Interpreters/InterpreterSetQuery.cpp | 5 ----- src/Interpreters/ya.make | 1 - .../Formats/Impl/MySQLOutputFormat.h | 2 -- src/Server/HTTPHandler.h | 3 +-- src/TableFunctions/TableFunctionMySQL.cpp | 3 +-- .../01702_system_query_log.reference | 2 +- 14 files changed, 7 insertions(+), 69 deletions(-) delete mode 100644 src/Core/MySQL/MySQLSession.h diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index bf4e2f947dc..6a19fc9e036 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -48,11 +48,8 @@ #include #include #include -#include #include -#include #include -#include #include #include #include diff --git a/src/Access/AccessControlManager.h b/src/Access/AccessControlManager.h index c20216a291a..79f7073ef69 100644 --- a/src/Access/AccessControlManager.h +++ b/src/Access/AccessControlManager.h @@ -153,8 +153,6 @@ public: const ExternalAuthenticators & getExternalAuthenticators() const; - String getProfileName(const UUID & profile_id) const; - private: class ContextAccessCache; class CustomSettingsPrefixes; diff --git a/src/Access/SettingsProfilesCache.cpp b/src/Access/SettingsProfilesCache.cpp index 20880b94aba..3cd73720c3e 100644 --- a/src/Access/SettingsProfilesCache.cpp +++ b/src/Access/SettingsProfilesCache.cpp @@ -116,6 +116,7 @@ void SettingsProfilesCache::mergeSettingsAndConstraints() } } + void SettingsProfilesCache::mergeSettingsAndConstraintsFor(EnabledSettings & enabled) const { SettingsProfileElements merged_settings; diff --git a/src/Core/MySQL/Authentication.cpp b/src/Core/MySQL/Authentication.cpp index 0eb080892c1..aeb9a411082 100644 --- a/src/Core/MySQL/Authentication.cpp +++ b/src/Core/MySQL/Authentication.cpp @@ -2,8 +2,6 @@ #include #include #include -#include -#include #include #include diff --git a/src/Core/MySQL/MySQLSession.h b/src/Core/MySQL/MySQLSession.h deleted file mode 100644 index 1ba17a40483..00000000000 --- a/src/Core/MySQL/MySQLSession.h +++ /dev/null @@ -1,19 +0,0 @@ -#pragma once - -#include -#include - -namespace DB -{ - -class MySQLSession : public DB::Session -{ -public: - using DB::Session::Session; - - uint8_t sequence_id = 0; - uint32_t client_capabilities = 0; - size_t max_packet_size = 0; -}; - -} diff --git a/src/Core/PostgreSQLProtocol.h b/src/Core/PostgreSQLProtocol.h index f0de4bbb843..2b92258394e 100644 --- a/src/Core/PostgreSQLProtocol.h +++ b/src/Core/PostgreSQLProtocol.h @@ -1,12 +1,11 @@ #pragma once #include -#include -#include #include #include #include #include +#include #include #include #include diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index da7228ebd0f..c4fb89067e2 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -626,20 +626,14 @@ ConfigurationPtr Context::getUsersConfig() return shared->users_config; } -std::shared_ptr Context::getContextAccessForUser(const UUID & user_id) const -{ - return getAccessControlManager().getContextAccess( - user_id, /* current_roles = */ {}, /* use_default_roles = */ true, - settings, current_database, client_info); -} - void Context::setUser(const UUID & user_id_) { auto lock = getLock(); user_id = user_id_; - access = getContextAccessForUser(user_id_); + access = getAccessControlManager().getContextAccess( + user_id_, /* current_roles = */ {}, /* use_default_roles = */ true, settings, current_database, client_info); auto user = access->getUser(); current_roles = std::make_shared>(user->granted_roles.findGranted(user->default_roles)); @@ -1271,14 +1265,6 @@ ContextMutablePtr Context::getBufferContext() const return buffer_context; } -Session * Context::getSessionOrNull() const -{ - if (hasSessionContext()) - return getSession(); - else - return nullptr; -} - const EmbeddedDictionaries & Context::getEmbeddedDictionaries() const { diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index 3e373c493f5..fbf64908f84 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -276,8 +276,6 @@ private: /// XXX: move this stuff to shared part instead. ContextMutablePtr buffer_context; /// Buffer context. Could be equal to this. - /// Non-owning, only here for MySQLOutputFormat to be able to modify sequence_id, see setSession() and getSession() - Session * session = nullptr; /// A flag, used to distinguish between user query and internal query to a database engine (MaterializePostgreSQL). bool is_internal_query = false; @@ -373,8 +371,6 @@ public: /// Normally you shouldn't call this function. Use the Session class to do authentication instead. void setUser(const UUID & user_id_); - std::shared_ptr getContextAccessForUser(const UUID & user_id) const; - UserPtr getUser() const; String getUserName() const; std::optional getUserID() const; @@ -604,14 +600,6 @@ public: ContextMutablePtr getGlobalContext() const; - // Exists only due to MySQLOutputFormat - Session * getSession() const { return getSessionContext()->session; } - void setSession(Session * new_session) - { - session = getSessionContext()->session = new_session; - } - Session * getSessionOrNull() const; - bool hasGlobalContext() const { return !global_context.expired(); } bool isGlobalContext() const { diff --git a/src/Interpreters/InterpreterSetQuery.cpp b/src/Interpreters/InterpreterSetQuery.cpp index 73af2bbe3c0..1c6a4236bf6 100644 --- a/src/Interpreters/InterpreterSetQuery.cpp +++ b/src/Interpreters/InterpreterSetQuery.cpp @@ -9,13 +9,8 @@ namespace DB BlockIO InterpreterSetQuery::execute() { const auto & ast = query_ptr->as(); - getContext()->checkSettingsConstraints(ast.changes); - // Here settings are pushed to the session context and are not visible in the query context getContext()->getSessionContext()->applySettingsChanges(ast.changes); - // Make setting changes also available to the query context. - getContext()->applySettingsChanges(ast.changes); - return {}; } diff --git a/src/Interpreters/ya.make b/src/Interpreters/ya.make index 9263435e003..0bc7cb11cf0 100644 --- a/src/Interpreters/ya.make +++ b/src/Interpreters/ya.make @@ -148,7 +148,6 @@ SRCS( RowRefs.cpp SelectIntersectExceptQueryVisitor.cpp Session.cpp - Session.cpp SessionLog.cpp Set.cpp SetVariants.cpp diff --git a/src/Processors/Formats/Impl/MySQLOutputFormat.h b/src/Processors/Formats/Impl/MySQLOutputFormat.h index a285a6d75f3..a8e1ada3d6a 100644 --- a/src/Processors/Formats/Impl/MySQLOutputFormat.h +++ b/src/Processors/Formats/Impl/MySQLOutputFormat.h @@ -14,7 +14,6 @@ class IColumn; class IDataType; class WriteBuffer; struct FormatSettings; -class MySQLSession; /** A stream for outputting data in a binary line-by-line format. */ @@ -35,7 +34,6 @@ public: private: void initialize(); -private: bool initialized = false; uint32_t client_capabilities = 0; uint8_t * sequence_id = nullptr; diff --git a/src/Server/HTTPHandler.h b/src/Server/HTTPHandler.h index f52958a191d..98f573f8cef 100644 --- a/src/Server/HTTPHandler.h +++ b/src/Server/HTTPHandler.h @@ -83,13 +83,12 @@ private: // The request_credential instance may outlive a single request/response loop. // This happens only when the authentication mechanism requires more than a single request/response exchange (e.g., SPNEGO). - std::shared_ptr request_session; std::unique_ptr request_credentials; // Returns true when the user successfully authenticated, // the session instance will be configured accordingly, and the request_credentials instance will be dropped. // Returns false when the user is not authenticated yet, and the 'Negotiate' response is sent, - // the request_session and request_credentials instances are preserved. + // the session and request_credentials instances are preserved. // Throws an exception if authentication failed. bool authenticateUser( HTTPServerRequest & request, diff --git a/src/TableFunctions/TableFunctionMySQL.cpp b/src/TableFunctions/TableFunctionMySQL.cpp index 92387b13d55..09f9cf8b1f5 100644 --- a/src/TableFunctions/TableFunctionMySQL.cpp +++ b/src/TableFunctions/TableFunctionMySQL.cpp @@ -61,9 +61,8 @@ void TableFunctionMySQL::parseArguments(const ASTPtr & ast_function, ContextPtr user_name = args[3]->as().value.safeGet(); password = args[4]->as().value.safeGet(); - const auto & settings = context->getSettingsRef(); /// Split into replicas if needed. 3306 is the default MySQL port number - const size_t max_addresses = settings.glob_expansion_max_elements; + size_t max_addresses = context->getSettingsRef().glob_expansion_max_elements; auto addresses = parseRemoteDescriptionForExternalDatabase(host_port, max_addresses, 3306); pool.emplace(remote_database_name, addresses, user_name, password); diff --git a/tests/queries/0_stateless/01702_system_query_log.reference b/tests/queries/0_stateless/01702_system_query_log.reference index 3458c2e5ed4..1f329feac22 100644 --- a/tests/queries/0_stateless/01702_system_query_log.reference +++ b/tests/queries/0_stateless/01702_system_query_log.reference @@ -8,7 +8,6 @@ GRANT queries REVOKE queries Misc queries ACTUAL LOG CONTENT: - -- fire all kinds of queries and then check if those are present in the system.query_log\nSET log_comment=\'system.query_log logging test\'; Select SELECT \'DROP queries and also a cleanup before the test\'; Drop DROP DATABASE IF EXISTS sqllt SYNC; DROP USER IF EXISTS sqllt_user; @@ -83,4 +82,5 @@ Rename RENAME TABLE sqllt.table TO sqllt.table_new; Rename RENAME TABLE sqllt.table_new TO sqllt.table; Drop TRUNCATE TABLE sqllt.table; Drop DROP TABLE sqllt.table SYNC; + SET log_comment=\'\'; DROP queries and also a cleanup after the test From 81d3e330870b4a8491cee44a54a45d58cf5da4a4 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 7 Sep 2021 10:36:39 +0300 Subject: [PATCH 113/125] Add crashing test --- ...02015_shard_crash_clang_12_build.reference | 1 + .../02015_shard_crash_clang_12_build.sh | 45 +++++++++++++++++++ 2 files changed, 46 insertions(+) create mode 100644 tests/queries/0_stateless/02015_shard_crash_clang_12_build.reference create mode 100755 tests/queries/0_stateless/02015_shard_crash_clang_12_build.sh diff --git a/tests/queries/0_stateless/02015_shard_crash_clang_12_build.reference b/tests/queries/0_stateless/02015_shard_crash_clang_12_build.reference new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/tests/queries/0_stateless/02015_shard_crash_clang_12_build.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/02015_shard_crash_clang_12_build.sh b/tests/queries/0_stateless/02015_shard_crash_clang_12_build.sh new file mode 100755 index 00000000000..287a9b45f4c --- /dev/null +++ b/tests/queries/0_stateless/02015_shard_crash_clang_12_build.sh @@ -0,0 +1,45 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + + +$CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS local" +$CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS distributed" + +$CLICKHOUSE_CLIENT --query "CREATE TABLE local (x UInt8) ENGINE = Memory;" +$CLICKHOUSE_CLIENT --query "CREATE TABLE distributed AS local ENGINE = Distributed(test_cluster_two_shards, currentDatabase(), local, x);" + +$CLICKHOUSE_CLIENT --insert_distributed_sync=0 --network_compression_method='zstd' --query "INSERT INTO distributed SELECT number FROM numbers(256);" +$CLICKHOUSE_CLIENT --insert_distributed_sync=0 --network_compression_method='zstd' --query "SYSTEM FLUSH DISTRIBUTED distributed;" + +function select_thread() +{ + while true; do + $CLICKHOUSE_CLIENT --insert_distributed_sync=0 --network_compression_method='zstd' --query "SELECT count() FROM local" >/dev/null + $CLICKHOUSE_CLIENT --insert_distributed_sync=0 --network_compression_method='zstd' --query "SELECT count() FROM distributed" >/dev/null + done +} + +export -f select_thread; + +TIMEOUT=30 + +timeout $TIMEOUT bash -c select_thread 2> /dev/null & +timeout $TIMEOUT bash -c select_thread 2> /dev/null & +timeout $TIMEOUT bash -c select_thread 2> /dev/null & +timeout $TIMEOUT bash -c select_thread 2> /dev/null & +timeout $TIMEOUT bash -c select_thread 2> /dev/null & +timeout $TIMEOUT bash -c select_thread 2> /dev/null & +timeout $TIMEOUT bash -c select_thread 2> /dev/null & +timeout $TIMEOUT bash -c select_thread 2> /dev/null & +timeout $TIMEOUT bash -c select_thread 2> /dev/null & +timeout $TIMEOUT bash -c select_thread 2> /dev/null & + +wait + +$CLICKHOUSE_CLIENT --query "SELECT 1" + +$CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS local" +$CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS distributed" From 5ff99314b1590d1a1fbf7d6a8bdbc27618dcb77b Mon Sep 17 00:00:00 2001 From: Anton Ivashkin Date: Tue, 7 Sep 2021 12:51:00 +0300 Subject: [PATCH 114/125] Reduce default settings for S3 multipart upload part size --- src/Core/Settings.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 887964bb233..09dfd347423 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -70,8 +70,8 @@ class IColumn; M(UInt64, idle_connection_timeout, 3600, "Close idle TCP connections after specified number of seconds.", 0) \ M(UInt64, distributed_connections_pool_size, DBMS_DEFAULT_DISTRIBUTED_CONNECTIONS_POOL_SIZE, "Maximum number of connections with one remote server in the pool.", 0) \ M(UInt64, connections_with_failover_max_tries, DBMS_CONNECTION_POOL_WITH_FAILOVER_DEFAULT_MAX_TRIES, "The maximum number of attempts to connect to replicas.", 0) \ - M(UInt64, s3_min_upload_part_size, 512*1024*1024, "The minimum size of part to upload during multipart upload to S3.", 0) \ - M(UInt64, s3_max_single_part_upload_size, 64*1024*1024, "The maximum size of object to upload using singlepart upload to S3.", 0) \ + M(UInt64, s3_min_upload_part_size, 32*1024*1024, "The minimum size of part to upload during multipart upload to S3.", 0) \ + M(UInt64, s3_max_single_part_upload_size, 32*1024*1024, "The maximum size of object to upload using singlepart upload to S3.", 0) \ M(UInt64, s3_max_single_read_retries, 4, "The maximum number of retries during single S3 read.", 0) \ M(UInt64, s3_max_redirects, 10, "Max number of S3 redirects hops allowed.", 0) \ M(UInt64, s3_max_connections, 1024, "The maximum number of connections per server.", 0) \ From 978dd19fa242469bbfac1e3701c23d59dd291b05 Mon Sep 17 00:00:00 2001 From: ZhiYong Wang Date: Tue, 7 Sep 2021 19:05:26 +0800 Subject: [PATCH 115/125] Fix coredump in creating distributed table --- src/Storages/StorageDistributed.cpp | 7 ++++++- ...02017_create_distributed_table_coredump.reference | 0 .../02017_create_distributed_table_coredump.sql | 12 ++++++++++++ 3 files changed, 18 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/02017_create_distributed_table_coredump.reference create mode 100644 tests/queries/0_stateless/02017_create_distributed_table_coredump.sql diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index df7d568deb9..1ad80f8aea6 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -1332,7 +1332,12 @@ void registerStorageDistributed(StorageFactory & factory) String remote_table = engine_args[2]->as().value.safeGet(); const auto & sharding_key = engine_args.size() >= 4 ? engine_args[3] : nullptr; - const auto & storage_policy = engine_args.size() >= 5 ? engine_args[4]->as().value.safeGet() : "default"; + String storage_policy = "default"; + if (engine_args.size() >= 5) + { + engine_args[4] = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args[4], local_context); + storage_policy = engine_args[4]->as().value.safeGet(); + } /// Check that sharding_key exists in the table and has numeric type. if (sharding_key) diff --git a/tests/queries/0_stateless/02017_create_distributed_table_coredump.reference b/tests/queries/0_stateless/02017_create_distributed_table_coredump.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02017_create_distributed_table_coredump.sql b/tests/queries/0_stateless/02017_create_distributed_table_coredump.sql new file mode 100644 index 00000000000..27c98c3e237 --- /dev/null +++ b/tests/queries/0_stateless/02017_create_distributed_table_coredump.sql @@ -0,0 +1,12 @@ +drop table if exists t; +drop table if exists td1; +drop table if exists td2; +drop table if exists td3; +create table t (val UInt32) engine = MergeTree order by val; +create table td1 engine = Distributed(test_shard_localhost, currentDatabase(), 't') as t; +create table td2 engine = Distributed(test_shard_localhost, currentDatabase(), 't', xxHash32(val), default) as t; +create table td3 engine = Distributed(test_shard_localhost, currentDatabase(), 't', xxHash32(val), 'default') as t; +drop table if exists t; +drop table if exists td1; +drop table if exists td2; +drop table if exists td3; From 69604eab3f72fc8c6e14b387a73dd4166319d680 Mon Sep 17 00:00:00 2001 From: Vitaly Date: Tue, 7 Sep 2021 14:05:55 +0300 Subject: [PATCH 116/125] Add Settings.Names, Settings.Values aliases for system.processes table --- src/Storages/System/StorageSystemProcesses.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/Storages/System/StorageSystemProcesses.cpp b/src/Storages/System/StorageSystemProcesses.cpp index e2685af7718..5e6ba37226c 100644 --- a/src/Storages/System/StorageSystemProcesses.cpp +++ b/src/Storages/System/StorageSystemProcesses.cpp @@ -73,7 +73,9 @@ NamesAndAliases StorageSystemProcesses::getNamesAndAliases() return { {"ProfileEvents.Names", {std::make_shared(std::make_shared())}, "mapKeys(ProfileEvents)"}, - {"ProfileEvents.Values", {std::make_shared(std::make_shared())}, "mapValues(ProfileEvents)"} + {"ProfileEvents.Values", {std::make_shared(std::make_shared())}, "mapValues(ProfileEvents)"}, + {"Settings.Names", {std::make_shared(std::make_shared())}, "mapKeys(Settings)" }, + {"Settings.Values", {std::make_shared(std::make_shared())}, "mapValues(Settings)"} }; } From d9ca1e29c3eac0ebb4c8da8a449d0a514a61a2b6 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 7 Sep 2021 15:56:32 +0300 Subject: [PATCH 117/125] Increase stack size for coroutines --- src/Common/FiberStack.h | 9 ++++++++- .../0_stateless/02015_shard_crash_clang_12_build.sh | 2 ++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/src/Common/FiberStack.h b/src/Common/FiberStack.h index c509540cc9e..aec4befb515 100644 --- a/src/Common/FiberStack.h +++ b/src/Common/FiberStack.h @@ -27,7 +27,12 @@ private: size_t stack_size; size_t page_size = 0; public: - static constexpr size_t default_stack_size = 128 * 1024; /// 64KB was not enough for tests + /// NOTE: If you see random segfaults in CI and stack starts from boost::context::...fiber... + /// probably it worth to try to increase stack size for coroutines. + /// + /// Current value is just enough for all tests in our CI. It's not selected in some special + /// way. We will have 36 pages with 4KB page size. + static constexpr size_t default_stack_size = 144 * 1024; /// 64KB was not enough for tests explicit FiberStack(size_t stack_size_ = default_stack_size) : stack_size(stack_size_) { @@ -43,6 +48,8 @@ public: if (MAP_FAILED == vp) DB::throwFromErrno(fmt::format("FiberStack: Cannot mmap {}.", ReadableSize(num_bytes)), DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY); + /// TODO: make reports on illegal guard page access more clear. + /// Currently we will see segfault and almost random stacktrace. if (-1 == ::mprotect(vp, page_size, PROT_NONE)) { ::munmap(vp, num_bytes); diff --git a/tests/queries/0_stateless/02015_shard_crash_clang_12_build.sh b/tests/queries/0_stateless/02015_shard_crash_clang_12_build.sh index 287a9b45f4c..f6ede6592ff 100755 --- a/tests/queries/0_stateless/02015_shard_crash_clang_12_build.sh +++ b/tests/queries/0_stateless/02015_shard_crash_clang_12_build.sh @@ -1,5 +1,7 @@ #!/usr/bin/env bash +# This test reproduces crash in case of insufficient coroutines stack size + CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh From 36f57555f68fb8088d7ee553f6801d1e77f50c2e Mon Sep 17 00:00:00 2001 From: George Date: Tue, 7 Sep 2021 19:50:07 +0300 Subject: [PATCH 118/125] Edited and translated to Russian --- .../external-dicts-dict-layout.md | 5 +++-- .../table-engines/integrations/mongodb.md | 16 +++++++++++++-- docs/ru/operations/settings/settings-users.md | 2 +- .../external-dicts-dict-layout.md | 20 ++++++++++++++++--- .../sql-reference/statements/create/user.md | 1 + 5 files changed, 36 insertions(+), 8 deletions(-) diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md index 00a9610ce91..eb8ca425d24 100644 --- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md +++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md @@ -300,8 +300,9 @@ When searching for a dictionary, the cache is searched first. For each block of If keys are not found in dictionary, then update cache task is created and added into update queue. Update queue properties can be controlled with settings `max_update_queue_size`, `update_queue_push_timeout_milliseconds`, `query_wait_timeout_milliseconds`, `max_threads_for_updates`. -For cache dictionaries, the expiration [lifetime](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md) of data in the cache can be set. If more time than `lifetime` has passed since loading the data in a cell, the cell’s value is not used and key becomes expired, and it is re-requested the next time it needs to be used this behaviour can be configured with setting `allow_read_expired_keys`. -This is the least effective of all the ways to store dictionaries. The speed of the cache depends strongly on correct settings and the usage scenario. A cache type dictionary performs well only when the hit rates are high enough (recommended 99% and higher). You can view the average hit rate in the `system.dictionaries` table. +For cache dictionaries, the expiration [lifetime](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md) of data in the cache can be set. If more time than `lifetime` has passed since loading the data in a cell, the cell’s value is not used and key becomes expired. The key is re-requested the next time it needs to be used. This behaviour can be configured with setting `allow_read_expired_keys`. + +This is the least effective of all the ways to store dictionaries. The speed of the cache depends strongly on correct settings and the usage scenario. A cache type dictionary performs well only when the hit rates are high enough (recommended 99% and higher). You can view the average hit rate in the [system.dictionaries](../../../operations/system-tables/dictionaries.md) table. If setting `allow_read_expired_keys` is set to 1, by default 0. Then dictionary can support asynchronous updates. If a client requests keys and all of them are in cache, but some of them are expired, then dictionary will return expired keys for a client and request them asynchronously from the source. diff --git a/docs/ru/engines/table-engines/integrations/mongodb.md b/docs/ru/engines/table-engines/integrations/mongodb.md index 05820d03fe6..66c30250926 100644 --- a/docs/ru/engines/table-engines/integrations/mongodb.md +++ b/docs/ru/engines/table-engines/integrations/mongodb.md @@ -15,7 +15,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name name1 [type1], name2 [type2], ... -) ENGINE = MongoDB(host:port, database, collection, user, password); +) ENGINE = MongoDB(host:port, database, collection, user, password [, options]); ``` **Параметры движка** @@ -30,9 +30,11 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name - `password` — пароль пользователя. +- `options` — MongoDB connection string options (optional parameter). + ## Примеры использования {#usage-example} -Таблица в ClickHouse для чтения данных из колекции MongoDB: +Создание в ClickHouse для чтения данных из колекции MongoDB: ``` text CREATE TABLE mongo_table @@ -42,6 +44,16 @@ CREATE TABLE mongo_table ) ENGINE = MongoDB('mongo1:27017', 'test', 'simple_table', 'testuser', 'clickhouse'); ``` +Чтение из сервера MongoDB, защищенного SSL: + +``` text +CREATE TABLE mongo_table_ssl +( + key UInt64, + data String +) ENGINE = MongoDB('mongo2:27017', 'test', 'simple_table', 'testuser', 'clickhouse', 'ssl=true'); +``` + Запрос к таблице: ``` sql diff --git a/docs/ru/operations/settings/settings-users.md b/docs/ru/operations/settings/settings-users.md index 6a10e518817..4570ce38bad 100644 --- a/docs/ru/operations/settings/settings-users.md +++ b/docs/ru/operations/settings/settings-users.md @@ -28,7 +28,7 @@ toc_title: "Настройки пользователей" profile_name default - + default diff --git a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md index 06fe4ae327a..b61c2cbcbd7 100644 --- a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md +++ b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md @@ -297,9 +297,13 @@ RANGE(MIN StartDate MAX EndDate); При поиске в словаре сначала просматривается кэш. На каждый блок данных, все не найденные в кэше или устаревшие ключи запрашиваются у источника с помощью `SELECT attrs... FROM db.table WHERE id IN (k1, k2, ...)`. Затем, полученные данные записываются в кэш. -Для cache-словарей может быть задано время устаревания [lifetime](external-dicts-dict-lifetime.md) данных в кэше. Если от загрузки данных в ячейке прошло больше времени, чем `lifetime`, то значение не используется, и будет запрошено заново при следующей необходимости его использовать. +Если ключи не были найдены в словаре, то создается задание для обновления кэша, которое добавляется в очередь обновлений. Параметры очереди обновлений можно устанавливать настройками `max_update_queue_size`, `update_queue_push_timeout_milliseconds`, `query_wait_timeout_milliseconds`, `max_threads_for_updates` -Это наименее эффективный из всех способов размещения словарей. Скорость работы кэша очень сильно зависит от правильности настройки и сценария использования. Словарь типа cache показывает высокую производительность лишь при достаточно больших hit rate-ах (рекомендуется 99% и выше). Посмотреть средний hit rate можно в таблице `system.dictionaries`. +Для cache-словарей может быть задано время устаревания [lifetime](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md) данных в кэше. Если от загрузки данных в ячейке прошло больше времени, чем `lifetime`, то значение не используется, а ключ устаревает. Ключ будет запрошен заново при следующей необходимости его использовать. Это можно настроить с помощью `allow_read_expired_keys`. + +Это наименее эффективный из всех способов размещения словарей. Скорость работы кэша очень сильно зависит от правильности настройки и сценария использования. Словарь типа cache показывает высокую производительность лишь при достаточно больших hit rate-ах (рекомендуется 99% и выше). Посмотреть средний hit rate можно в таблице [system.dictionaries](../../../operations/system-tables/dictionaries.md). + +Если параметр `allow_read_expired_keys` выставлен на 1 (0 по умолчанию), то словарь поддерживает асинхронные обновления. Если клиент запрашивает ключи, которые находятся в кэше, но при этом некоторые из них устарели, то словарь вернет устаревшие ключи клиенту и запросит их асинхронно у источника. Чтобы увеличить производительность кэша, используйте подзапрос с `LIMIT`, а снаружи вызывайте функцию со словарём. @@ -312,6 +316,16 @@ RANGE(MIN StartDate MAX EndDate); 1000000000 + + 0 + + 100000 + + 10 + + 60000 + + 4 ``` @@ -338,7 +352,7 @@ LAYOUT(CACHE(SIZE_IN_CELLS 1000000000)) ### ssd_cache {#ssd-cache} -Похож на `cache`, но хранит данные на SSD и индекс в оперативной памяти. +Похож на `cache`, но хранит данные на SSD и индекс в оперативной памяти. Все параметры, относящиеся к очереди обновлений, могут также быть применены к SSD-кэш словарям. ``` xml diff --git a/docs/ru/sql-reference/statements/create/user.md b/docs/ru/sql-reference/statements/create/user.md index 22efaa71bfc..f6248d97ba9 100644 --- a/docs/ru/sql-reference/statements/create/user.md +++ b/docs/ru/sql-reference/statements/create/user.md @@ -15,6 +15,7 @@ CREATE USER [IF NOT EXISTS | OR REPLACE] name1 [ON CLUSTER cluster_name1] [NOT IDENTIFIED | IDENTIFIED {[WITH {no_password | plaintext_password | sha256_password | sha256_hash | double_sha1_password | double_sha1_hash}] BY {'password' | 'hash'}} | {WITH ldap SERVER 'server_name'} | {WITH kerberos [REALM 'realm']}] [HOST {LOCAL | NAME 'name' | REGEXP 'name_regexp' | IP 'address' | LIKE 'pattern'} [,...] | ANY | NONE] [DEFAULT ROLE role [,...]] + [DEFAULT DATABASE database | NONE] [GRANTEES {user | role | ANY | NONE} [,...] [EXCEPT {user | role} [,...]]] [SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [READONLY | WRITABLE] | PROFILE 'profile_name'] [,...] ``` From b984caaab7145f4cd2239eb7de2b0d7052b43da7 Mon Sep 17 00:00:00 2001 From: George Date: Tue, 7 Sep 2021 19:55:13 +0300 Subject: [PATCH 119/125] Revert "Edited and translated to Russian" This reverts commit 36f57555f68fb8088d7ee553f6801d1e77f50c2e. --- .../external-dicts-dict-layout.md | 5 ++--- .../table-engines/integrations/mongodb.md | 16 ++------------- docs/ru/operations/settings/settings-users.md | 2 +- .../external-dicts-dict-layout.md | 20 +++---------------- .../sql-reference/statements/create/user.md | 1 - 5 files changed, 8 insertions(+), 36 deletions(-) diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md index eb8ca425d24..00a9610ce91 100644 --- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md +++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md @@ -300,9 +300,8 @@ When searching for a dictionary, the cache is searched first. For each block of If keys are not found in dictionary, then update cache task is created and added into update queue. Update queue properties can be controlled with settings `max_update_queue_size`, `update_queue_push_timeout_milliseconds`, `query_wait_timeout_milliseconds`, `max_threads_for_updates`. -For cache dictionaries, the expiration [lifetime](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md) of data in the cache can be set. If more time than `lifetime` has passed since loading the data in a cell, the cell’s value is not used and key becomes expired. The key is re-requested the next time it needs to be used. This behaviour can be configured with setting `allow_read_expired_keys`. - -This is the least effective of all the ways to store dictionaries. The speed of the cache depends strongly on correct settings and the usage scenario. A cache type dictionary performs well only when the hit rates are high enough (recommended 99% and higher). You can view the average hit rate in the [system.dictionaries](../../../operations/system-tables/dictionaries.md) table. +For cache dictionaries, the expiration [lifetime](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md) of data in the cache can be set. If more time than `lifetime` has passed since loading the data in a cell, the cell’s value is not used and key becomes expired, and it is re-requested the next time it needs to be used this behaviour can be configured with setting `allow_read_expired_keys`. +This is the least effective of all the ways to store dictionaries. The speed of the cache depends strongly on correct settings and the usage scenario. A cache type dictionary performs well only when the hit rates are high enough (recommended 99% and higher). You can view the average hit rate in the `system.dictionaries` table. If setting `allow_read_expired_keys` is set to 1, by default 0. Then dictionary can support asynchronous updates. If a client requests keys and all of them are in cache, but some of them are expired, then dictionary will return expired keys for a client and request them asynchronously from the source. diff --git a/docs/ru/engines/table-engines/integrations/mongodb.md b/docs/ru/engines/table-engines/integrations/mongodb.md index 66c30250926..05820d03fe6 100644 --- a/docs/ru/engines/table-engines/integrations/mongodb.md +++ b/docs/ru/engines/table-engines/integrations/mongodb.md @@ -15,7 +15,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name name1 [type1], name2 [type2], ... -) ENGINE = MongoDB(host:port, database, collection, user, password [, options]); +) ENGINE = MongoDB(host:port, database, collection, user, password); ``` **Параметры движка** @@ -30,11 +30,9 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name - `password` — пароль пользователя. -- `options` — MongoDB connection string options (optional parameter). - ## Примеры использования {#usage-example} -Создание в ClickHouse для чтения данных из колекции MongoDB: +Таблица в ClickHouse для чтения данных из колекции MongoDB: ``` text CREATE TABLE mongo_table @@ -44,16 +42,6 @@ CREATE TABLE mongo_table ) ENGINE = MongoDB('mongo1:27017', 'test', 'simple_table', 'testuser', 'clickhouse'); ``` -Чтение из сервера MongoDB, защищенного SSL: - -``` text -CREATE TABLE mongo_table_ssl -( - key UInt64, - data String -) ENGINE = MongoDB('mongo2:27017', 'test', 'simple_table', 'testuser', 'clickhouse', 'ssl=true'); -``` - Запрос к таблице: ``` sql diff --git a/docs/ru/operations/settings/settings-users.md b/docs/ru/operations/settings/settings-users.md index 4570ce38bad..6a10e518817 100644 --- a/docs/ru/operations/settings/settings-users.md +++ b/docs/ru/operations/settings/settings-users.md @@ -28,7 +28,7 @@ toc_title: "Настройки пользователей" profile_name default - default + diff --git a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md index b61c2cbcbd7..06fe4ae327a 100644 --- a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md +++ b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md @@ -297,13 +297,9 @@ RANGE(MIN StartDate MAX EndDate); При поиске в словаре сначала просматривается кэш. На каждый блок данных, все не найденные в кэше или устаревшие ключи запрашиваются у источника с помощью `SELECT attrs... FROM db.table WHERE id IN (k1, k2, ...)`. Затем, полученные данные записываются в кэш. -Если ключи не были найдены в словаре, то создается задание для обновления кэша, которое добавляется в очередь обновлений. Параметры очереди обновлений можно устанавливать настройками `max_update_queue_size`, `update_queue_push_timeout_milliseconds`, `query_wait_timeout_milliseconds`, `max_threads_for_updates` +Для cache-словарей может быть задано время устаревания [lifetime](external-dicts-dict-lifetime.md) данных в кэше. Если от загрузки данных в ячейке прошло больше времени, чем `lifetime`, то значение не используется, и будет запрошено заново при следующей необходимости его использовать. -Для cache-словарей может быть задано время устаревания [lifetime](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md) данных в кэше. Если от загрузки данных в ячейке прошло больше времени, чем `lifetime`, то значение не используется, а ключ устаревает. Ключ будет запрошен заново при следующей необходимости его использовать. Это можно настроить с помощью `allow_read_expired_keys`. - -Это наименее эффективный из всех способов размещения словарей. Скорость работы кэша очень сильно зависит от правильности настройки и сценария использования. Словарь типа cache показывает высокую производительность лишь при достаточно больших hit rate-ах (рекомендуется 99% и выше). Посмотреть средний hit rate можно в таблице [system.dictionaries](../../../operations/system-tables/dictionaries.md). - -Если параметр `allow_read_expired_keys` выставлен на 1 (0 по умолчанию), то словарь поддерживает асинхронные обновления. Если клиент запрашивает ключи, которые находятся в кэше, но при этом некоторые из них устарели, то словарь вернет устаревшие ключи клиенту и запросит их асинхронно у источника. +Это наименее эффективный из всех способов размещения словарей. Скорость работы кэша очень сильно зависит от правильности настройки и сценария использования. Словарь типа cache показывает высокую производительность лишь при достаточно больших hit rate-ах (рекомендуется 99% и выше). Посмотреть средний hit rate можно в таблице `system.dictionaries`. Чтобы увеличить производительность кэша, используйте подзапрос с `LIMIT`, а снаружи вызывайте функцию со словарём. @@ -316,16 +312,6 @@ RANGE(MIN StartDate MAX EndDate); 1000000000 - - 0 - - 100000 - - 10 - - 60000 - - 4 ``` @@ -352,7 +338,7 @@ LAYOUT(CACHE(SIZE_IN_CELLS 1000000000)) ### ssd_cache {#ssd-cache} -Похож на `cache`, но хранит данные на SSD и индекс в оперативной памяти. Все параметры, относящиеся к очереди обновлений, могут также быть применены к SSD-кэш словарям. +Похож на `cache`, но хранит данные на SSD и индекс в оперативной памяти. ``` xml diff --git a/docs/ru/sql-reference/statements/create/user.md b/docs/ru/sql-reference/statements/create/user.md index f6248d97ba9..22efaa71bfc 100644 --- a/docs/ru/sql-reference/statements/create/user.md +++ b/docs/ru/sql-reference/statements/create/user.md @@ -15,7 +15,6 @@ CREATE USER [IF NOT EXISTS | OR REPLACE] name1 [ON CLUSTER cluster_name1] [NOT IDENTIFIED | IDENTIFIED {[WITH {no_password | plaintext_password | sha256_password | sha256_hash | double_sha1_password | double_sha1_hash}] BY {'password' | 'hash'}} | {WITH ldap SERVER 'server_name'} | {WITH kerberos [REALM 'realm']}] [HOST {LOCAL | NAME 'name' | REGEXP 'name_regexp' | IP 'address' | LIKE 'pattern'} [,...] | ANY | NONE] [DEFAULT ROLE role [,...]] - [DEFAULT DATABASE database | NONE] [GRANTEES {user | role | ANY | NONE} [,...] [EXCEPT {user | role} [,...]]] [SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [READONLY | WRITABLE] | PROFILE 'profile_name'] [,...] ``` From 5e133a3cc638b5e16fee5529c258553e6b43d90d Mon Sep 17 00:00:00 2001 From: Dmitrii Kovalkov Date: Wed, 8 Sep 2021 07:58:22 +0300 Subject: [PATCH 120/125] Run generate-ya-make.sh --- src/Storages/ya.make | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Storages/ya.make b/src/Storages/ya.make index 5b246cf5aca..11a1ad212c1 100644 --- a/src/Storages/ya.make +++ b/src/Storages/ya.make @@ -214,7 +214,6 @@ SRCS( System/StorageSystemTables.cpp System/StorageSystemUserDirectories.cpp System/StorageSystemUsers.cpp - System/StorageSystemViews.cpp System/StorageSystemWarnings.cpp System/StorageSystemZeros.cpp System/StorageSystemZooKeeper.cpp From 5588853893bd1c96f7a96de75f8df027b93bed13 Mon Sep 17 00:00:00 2001 From: gyuton <40863448+gyuton@users.noreply.github.com> Date: Wed, 8 Sep 2021 13:25:06 +0300 Subject: [PATCH 121/125] Apply suggestions from code review Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- docs/en/interfaces/formats.md | 6 +++--- docs/en/operations/settings/settings.md | 18 +++++++++--------- .../nested-data-structures/nested.md | 2 +- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index a83ee999070..129a50bb7fc 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -1270,7 +1270,7 @@ You can insert Parquet data from a file into ClickHouse table by the following c $ cat {filename} | clickhouse-client --query="INSERT INTO {some_table} FORMAT Parquet" ``` -To insert `Array(Struct)` columns into [Nested](../sql-reference/data-types/nested-data-structures/nested.md) columns use the setting [input_format_parquet_import_nested](../operations/settings/settings.md#input_format_parquet_import_nested). +To insert `Array(Struct)` values into [Nested](../sql-reference/data-types/nested-data-structures/nested.md) columns you must switch on the [input_format_parquet_import_nested](../operations/settings/settings.md#input_format_parquet_import_nested) setting. You can select data from a ClickHouse table and save them into some file in the Parquet format by the following command: @@ -1330,7 +1330,7 @@ You can insert Arrow data from a file into ClickHouse table by the following com $ cat filename.arrow | clickhouse-client --query="INSERT INTO some_table FORMAT Arrow" ``` -To insert `Array(Struct)` columns into [Nested](../sql-reference/data-types/nested-data-structures/nested.md) columns use the setting [input_format_arrow_import_nested](../operations/settings/settings.md#input_format_arrow_import_nested). +To insert `Array(Struct)` values into [Nested](../sql-reference/data-types/nested-data-structures/nested.md) columns you must switch on the [input_format_arrow_import_nested](../operations/settings/settings.md#input_format_arrow_import_nested) setting. ### Selecting Data {#selecting-data-arrow} @@ -1388,7 +1388,7 @@ You can insert ORC data from a file into ClickHouse table by the following comma $ cat filename.orc | clickhouse-client --query="INSERT INTO some_table FORMAT ORC" ``` -To insert `Array(Struct)` columns into [Nested](../sql-reference/data-types/nested-data-structures/nested.md) columns use the setting [input_format_orc_import_nested](../operations/settings/settings.md#input_format_orc_import_nested). +To insert `Array(Struct)` values into [Nested](../sql-reference/data-types/nested-data-structures/nested.md) columns you must switch on the [input_format_orc_import_nested](../operations/settings/settings.md#input_format_orc_import_nested) setting. ### Selecting Data {#selecting-data-2} diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index e8355eeaa1d..9c54f925613 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -262,34 +262,34 @@ If both `input_format_allow_errors_num` and `input_format_allow_errors_ratio` ar ## input_format_parquet_import_nested {#input_format_parquet_import_nested} -Enables or disables the ability to insert `Array(Struct)` columns into [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) columns in [Parquet](../../interfaces/formats.md#data-format-parquet) input format. +Enables or disables the ability to insert `Array(Struct)` values into [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) columns in [Parquet](../../interfaces/formats.md#data-format-parquet) input format. Possible values: -- 0 — Disabled. -- 1 — Enabled. +- 0 — `Array(Struct)` values can not be inserted into `Nested` columns. +- 1 — `Array(Struct)` values can be inserted into `Nested` columns. Default value: `0`. ## input_format_arrow_import_nested {#input_format_arrow_import_nested} -Enables or disables the ability to insert `Array(Struct)` columns into [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) columns in [Arrow](../../interfaces/formats.md#data_types-matching-arrow) input format. +Enables or disables the ability to insert `Array(Struct)` values into [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) columns in [Arrow](../../interfaces/formats.md#data_types-matching-arrow) input format. Possible values: -- 0 — Disabled. -- 1 — Enabled. +- 0 — `Array(Struct)` values can not be inserted into `Nested` columns. +- 1 — `Array(Struct)` values can be inserted into `Nested` columns. Default value: `0`. ## input_format_orc_import_nested {#input_format_orc_import_nested} -Enables or disables the ability to insert `Array(Struct)` columns into [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) columns in [ORC](../../interfaces/formats.md#data-format-orc) input format. +Enables or disables the ability to insert `Array(Struct)` values into [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) columns in [ORC](../../interfaces/formats.md#data-format-orc) input format. Possible values: -- 0 — Disabled. -- 1 — Enabled. +- 0 — `Array(Struct)` values can not be inserted into `Nested` columns. +- 1 — `Array(Struct)` values can be inserted into `Nested` columns. Default value: `0`. diff --git a/docs/en/sql-reference/data-types/nested-data-structures/nested.md b/docs/en/sql-reference/data-types/nested-data-structures/nested.md index 281465f2075..0c53248baf9 100644 --- a/docs/en/sql-reference/data-types/nested-data-structures/nested.md +++ b/docs/en/sql-reference/data-types/nested-data-structures/nested.md @@ -105,6 +105,6 @@ The ALTER query for elements in a nested data structure has limitations. ## Inserting {#inserting} -It is possible to insert `Array(Struct)` columns into nested columns with the settings [input_format_parquet_import_nested](../../../operations/settings/settings.md#input_format_parquet_import_nested), [input_format_arrow_import_nested](../../../operations/settings/settings.md#input_format_arrow_import_nested) and [input_format_orc_import_nested](../../../operations/settings/settings.md#input_format_orc_import_nested) in [Parquet](../../../interfaces/formats.md#data-format-parquet), [Arrow](../../../interfaces/formats.md#data_types-matching-arrow) and [ORC](../../../interfaces/formats.md#data-format-orc) input formats correspondingly. +To insert `Array(Struct)` values into `Nested` columns, you must switch on the [input_format_parquet_import_nested](../../../operations/settings/settings.md#input_format_parquet_import_nested), [input_format_arrow_import_nested](../../../operations/settings/settings.md#input_format_arrow_import_nested) or [input_format_orc_import_nested](../../../operations/settings/settings.md#input_format_orc_import_nested) settings for [Parquet](../../../interfaces/formats.md#data-format-parquet), [Arrow](../../../interfaces/formats.md#data_types-matching-arrow) and [ORC](../../../interfaces/formats.md#data-format-orc) input formats respectively. [Original article](https://clickhouse.tech/docs/en/data_types/nested_data_structures/nested/) From 3a949289118ba4b56e8007f3fc01f28cebbb657f Mon Sep 17 00:00:00 2001 From: George Date: Wed, 8 Sep 2021 13:40:43 +0300 Subject: [PATCH 122/125] translated to Russian --- docs/ru/interfaces/formats.md | 12 +++++-- docs/ru/operations/settings/settings.md | 33 +++++++++++++++++++ .../nested-data-structures/nested.md | 7 +++- 3 files changed, 48 insertions(+), 4 deletions(-) diff --git a/docs/ru/interfaces/formats.md b/docs/ru/interfaces/formats.md index 1dd10e1f76d..93be1736ef7 100644 --- a/docs/ru/interfaces/formats.md +++ b/docs/ru/interfaces/formats.md @@ -1180,7 +1180,7 @@ ClickHouse поддерживает настраиваемую точность Типы данных столбцов в ClickHouse могут отличаться от типов данных соответствующих полей файла в формате Parquet. При вставке данных ClickHouse интерпретирует типы данных в соответствии с таблицей выше, а затем [приводит](../sql-reference/functions/type-conversion-functions/#type_conversion_function-cast) данные к тому типу, который установлен для столбца таблицы. -### Вставка и выборка данных {#vstavka-i-vyborka-dannykh} +### Вставка и выборка данных {#inserting-and-selecting-data} Чтобы вставить в ClickHouse данные из файла в формате Parquet, выполните команду следующего вида: @@ -1188,6 +1188,8 @@ ClickHouse поддерживает настраиваемую точность $ cat {filename} | clickhouse-client --query="INSERT INTO {some_table} FORMAT Parquet" ``` +Чтобы вставить данные типа `Array(Struct)` в колонки типа [Nested](../sql-reference/data-types/nested-data-structures/nested.md), нужно включить настройку [input_format_parquet_import_nested](../operations/settings/settings.md#input_format_parquet_import_nested). + Чтобы получить данные из таблицы ClickHouse и сохранить их в файл формата Parquet, используйте команду следующего вида: ``` bash @@ -1246,6 +1248,8 @@ ClickHouse поддерживает настраиваемую точность $ cat filename.arrow | clickhouse-client --query="INSERT INTO some_table FORMAT Arrow" ``` +Чтобы вставить данные типа `Array(Struct)` в колонки типа [Nested](../sql-reference/data-types/nested-data-structures/nested.md), нужно включить настройку [input_format_arrow_import_nested](../operations/settings/settings.md#input_format_arrow_import_nested). + ### Вывод данных {#selecting-data-arrow} Чтобы получить данные из таблицы ClickHouse и сохранить их в файл формата Arrow, используйте команду следующего вида: @@ -1294,7 +1298,7 @@ ClickHouse поддерживает настраиваемую точность Типы данных столбцов в таблицах ClickHouse могут отличаться от типов данных для соответствующих полей ORC. При вставке данных ClickHouse интерпретирует типы данных ORC согласно таблице соответствия, а затем [приводит](../sql-reference/functions/type-conversion-functions/#type_conversion_function-cast) данные к типу, установленному для столбца таблицы ClickHouse. -### Вставка данных {#vstavka-dannykh-1} +### Вставка данных {#inserting-data-2} Чтобы вставить в ClickHouse данные из файла в формате ORC, используйте команду следующего вида: @@ -1302,7 +1306,9 @@ ClickHouse поддерживает настраиваемую точность $ cat filename.orc | clickhouse-client --query="INSERT INTO some_table FORMAT ORC" ``` -### Вывод данных {#vyvod-dannykh-1} +Чтобы вставить данные типа `Array(Struct)` в колонки типа [Nested](../sql-reference/data-types/nested-data-structures/nested.md), нужно включить настройку [input_format_orc_import_nested](../operations/settings/settings.md#input_format_orc_import_nested). + +### Вывод данных {#selecting-data-2} Чтобы получить данные из таблицы ClickHouse и сохранить их в файл формата ORC, используйте команду следующего вида: diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index 56b04e81a94..d69c7fddb30 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -237,6 +237,39 @@ ClickHouse применяет настройку в тех случаях, ко В случае превышения `input_format_allow_errors_ratio` ClickHouse генерирует исключение. +## input_format_parquet_import_nested {#input_format_parquet_import_nested} + +Включает или отключает возможность вставки данных типа `Array(Struct)` в колонки типа [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) в формате ввода [Parquet](../../interfaces/formats.md#data-format-parquet). + +Возможные значения: + +- 0 — данные типа `Array(Struct)` не могут быть вставлены в колонки `Nested`. +- 1 — данные типа `Array(Struct)` могут быть вставлены в колонки `Nested`. + +Значение по умолчанию: `0`. + +## input_format_arrow_import_nested {#input_format_arrow_import_nested} + +Включает или отключает возможность вставки данных типа `Array(Struct)` в колонки типа [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) в формате ввода [Arrow](../../interfaces/formats.md#data_types-matching-arrow). + +Возможные значения: + +- 0 — данные типа `Array(Struct)` не могут быть вставлены в колонки `Nested`. +- 1 — данные типа `Array(Struct)` могут быть вставлены в колонки `Nested`. + +Значение по умолчанию: `0`. + +## input_format_orc_import_nested {#input_format_orc_import_nested} + +Включает или отключает возможность вставки данных типа `Array(Struct)` в колонки типа [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) в формате ввода [ORC](../../interfaces/formats.md#data-format-orc). + +Возможные значения: + +- 0 — данные типа `Array(Struct)` не могут быть вставлены в колонки `Nested`. +- 1 — данные типа `Array(Struct)` могут быть вставлены в колонки `Nested`. + +Значение по умолчанию: `0`. + ## input_format_values_interpret_expressions {#settings-input_format_values_interpret_expressions} Включает или отключает парсер SQL, если потоковый парсер не может проанализировать данные. Этот параметр используется только для формата [Values](../../interfaces/formats.md#data-format-values) при вставке данных. Дополнительные сведения о парсерах читайте в разделе [Синтаксис](../../sql-reference/syntax.md). diff --git a/docs/ru/sql-reference/data-types/nested-data-structures/nested.md b/docs/ru/sql-reference/data-types/nested-data-structures/nested.md index 718fe77ae95..8d48cd55713 100644 --- a/docs/ru/sql-reference/data-types/nested-data-structures/nested.md +++ b/docs/ru/sql-reference/data-types/nested-data-structures/nested.md @@ -1,4 +1,6 @@ -# Nested(Name1 Type1, Name2 Type2, …) {#nestedname1-type1-name2-type2} +# Nested {#nested} + +## Nested(Name1 Type1, Name2 Type2, …) {#nestedname1-type1-name2-type2} Вложенная структура данных - это как будто вложенная таблица. Параметры вложенной структуры данных - имена и типы столбцов, указываются так же, как у запроса CREATE. Каждой строке таблицы может соответствовать произвольное количество строк вложенной структуры данных. @@ -96,3 +98,6 @@ LIMIT 10 Работоспособность запроса ALTER для элементов вложенных структур данных, является сильно ограниченной. +## Inserting {#inserting} + +Чтобы вставить данные типа `Array(Struct)` в колонки `Nested`, нужно включить настройку [input_format_parquet_import_nested](../../../operations/settings/settings.md#input_format_parquet_import_nested), [input_format_arrow_import_nested](../../../operations/settings/settings.md#input_format_arrow_import_nested) или [input_format_orc_import_nested](../../../operations/settings/settings.md#input_format_orc_import_nested) для форматов ввода [Parquet](../../../interfaces/formats.md#data-format-parquet), [Arrow](../../../interfaces/formats.md#data_types-matching-arrow) и [ORC](../../../interfaces/formats.md#data-format-orc) соответственно. From 5ab0f3ac28ae1b6ad4ec0caacb3a5edd8acaf679 Mon Sep 17 00:00:00 2001 From: George Date: Wed, 8 Sep 2021 19:38:59 +0300 Subject: [PATCH 123/125] Updated docs --- docs/en/interfaces/formats.md | 6 +++--- .../data-types/nested-data-structures/nested.md | 4 ---- docs/ru/interfaces/formats.md | 6 +++--- .../data-types/nested-data-structures/nested.md | 4 ---- 4 files changed, 6 insertions(+), 14 deletions(-) diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index 129a50bb7fc..68a52dd702e 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -1270,7 +1270,7 @@ You can insert Parquet data from a file into ClickHouse table by the following c $ cat {filename} | clickhouse-client --query="INSERT INTO {some_table} FORMAT Parquet" ``` -To insert `Array(Struct)` values into [Nested](../sql-reference/data-types/nested-data-structures/nested.md) columns you must switch on the [input_format_parquet_import_nested](../operations/settings/settings.md#input_format_parquet_import_nested) setting. +To insert data into [Nested](../sql-reference/data-types/nested-data-structures/nested.md) columns as an array of structs values you must switch on the [input_format_parquet_import_nested](../operations/settings/settings.md#input_format_parquet_import_nested) setting. You can select data from a ClickHouse table and save them into some file in the Parquet format by the following command: @@ -1330,7 +1330,7 @@ You can insert Arrow data from a file into ClickHouse table by the following com $ cat filename.arrow | clickhouse-client --query="INSERT INTO some_table FORMAT Arrow" ``` -To insert `Array(Struct)` values into [Nested](../sql-reference/data-types/nested-data-structures/nested.md) columns you must switch on the [input_format_arrow_import_nested](../operations/settings/settings.md#input_format_arrow_import_nested) setting. +To insert data into [Nested](../sql-reference/data-types/nested-data-structures/nested.md) columns as an array of structs values you must switch on the [input_format_arrow_import_nested](../operations/settings/settings.md#input_format_arrow_import_nested) setting. ### Selecting Data {#selecting-data-arrow} @@ -1388,7 +1388,7 @@ You can insert ORC data from a file into ClickHouse table by the following comma $ cat filename.orc | clickhouse-client --query="INSERT INTO some_table FORMAT ORC" ``` -To insert `Array(Struct)` values into [Nested](../sql-reference/data-types/nested-data-structures/nested.md) columns you must switch on the [input_format_orc_import_nested](../operations/settings/settings.md#input_format_orc_import_nested) setting. +To insert data into [Nested](../sql-reference/data-types/nested-data-structures/nested.md) columns as an array of structs values you must switch on the [input_format_orc_import_nested](../operations/settings/settings.md#input_format_orc_import_nested) setting. ### Selecting Data {#selecting-data-2} diff --git a/docs/en/sql-reference/data-types/nested-data-structures/nested.md b/docs/en/sql-reference/data-types/nested-data-structures/nested.md index 0c53248baf9..65849f9cd0f 100644 --- a/docs/en/sql-reference/data-types/nested-data-structures/nested.md +++ b/docs/en/sql-reference/data-types/nested-data-structures/nested.md @@ -103,8 +103,4 @@ For a DESCRIBE query, the columns in a nested data structure are listed separate The ALTER query for elements in a nested data structure has limitations. -## Inserting {#inserting} - -To insert `Array(Struct)` values into `Nested` columns, you must switch on the [input_format_parquet_import_nested](../../../operations/settings/settings.md#input_format_parquet_import_nested), [input_format_arrow_import_nested](../../../operations/settings/settings.md#input_format_arrow_import_nested) or [input_format_orc_import_nested](../../../operations/settings/settings.md#input_format_orc_import_nested) settings for [Parquet](../../../interfaces/formats.md#data-format-parquet), [Arrow](../../../interfaces/formats.md#data_types-matching-arrow) and [ORC](../../../interfaces/formats.md#data-format-orc) input formats respectively. - [Original article](https://clickhouse.tech/docs/en/data_types/nested_data_structures/nested/) diff --git a/docs/ru/interfaces/formats.md b/docs/ru/interfaces/formats.md index 93be1736ef7..5bd0f8db406 100644 --- a/docs/ru/interfaces/formats.md +++ b/docs/ru/interfaces/formats.md @@ -1188,7 +1188,7 @@ ClickHouse поддерживает настраиваемую точность $ cat {filename} | clickhouse-client --query="INSERT INTO {some_table} FORMAT Parquet" ``` -Чтобы вставить данные типа `Array(Struct)` в колонки типа [Nested](../sql-reference/data-types/nested-data-structures/nested.md), нужно включить настройку [input_format_parquet_import_nested](../operations/settings/settings.md#input_format_parquet_import_nested). +Чтобы вставлять данные в колонки типа [Nested](../sql-reference/data-types/nested-data-structures/nested.md) в виде массива структур, нужно включить настройку [input_format_parquet_import_nested](../operations/settings/settings.md#input_format_parquet_import_nested). Чтобы получить данные из таблицы ClickHouse и сохранить их в файл формата Parquet, используйте команду следующего вида: @@ -1248,7 +1248,7 @@ ClickHouse поддерживает настраиваемую точность $ cat filename.arrow | clickhouse-client --query="INSERT INTO some_table FORMAT Arrow" ``` -Чтобы вставить данные типа `Array(Struct)` в колонки типа [Nested](../sql-reference/data-types/nested-data-structures/nested.md), нужно включить настройку [input_format_arrow_import_nested](../operations/settings/settings.md#input_format_arrow_import_nested). +Чтобы вставлять данные в колонки типа [Nested](../sql-reference/data-types/nested-data-structures/nested.md) в виде массива структур, нужно включить настройку [input_format_arrow_import_nested](../operations/settings/settings.md#input_format_arrow_import_nested). ### Вывод данных {#selecting-data-arrow} @@ -1306,7 +1306,7 @@ ClickHouse поддерживает настраиваемую точность $ cat filename.orc | clickhouse-client --query="INSERT INTO some_table FORMAT ORC" ``` -Чтобы вставить данные типа `Array(Struct)` в колонки типа [Nested](../sql-reference/data-types/nested-data-structures/nested.md), нужно включить настройку [input_format_orc_import_nested](../operations/settings/settings.md#input_format_orc_import_nested). +Чтобы вставлять данные в колонки типа [Nested](../sql-reference/data-types/nested-data-structures/nested.md) в виде массива структур, нужно включить настройку [input_format_orc_import_nested](../operations/settings/settings.md#input_format_orc_import_nested). ### Вывод данных {#selecting-data-2} diff --git a/docs/ru/sql-reference/data-types/nested-data-structures/nested.md b/docs/ru/sql-reference/data-types/nested-data-structures/nested.md index 8d48cd55713..db957e57502 100644 --- a/docs/ru/sql-reference/data-types/nested-data-structures/nested.md +++ b/docs/ru/sql-reference/data-types/nested-data-structures/nested.md @@ -97,7 +97,3 @@ LIMIT 10 При запросе DESCRIBE, столбцы вложенной структуры данных перечисляются так же по отдельности. Работоспособность запроса ALTER для элементов вложенных структур данных, является сильно ограниченной. - -## Inserting {#inserting} - -Чтобы вставить данные типа `Array(Struct)` в колонки `Nested`, нужно включить настройку [input_format_parquet_import_nested](../../../operations/settings/settings.md#input_format_parquet_import_nested), [input_format_arrow_import_nested](../../../operations/settings/settings.md#input_format_arrow_import_nested) или [input_format_orc_import_nested](../../../operations/settings/settings.md#input_format_orc_import_nested) для форматов ввода [Parquet](../../../interfaces/formats.md#data-format-parquet), [Arrow](../../../interfaces/formats.md#data_types-matching-arrow) и [ORC](../../../interfaces/formats.md#data-format-orc) соответственно. From 43850b51d79927cee37fab7dd978433db48dac80 Mon Sep 17 00:00:00 2001 From: George Date: Wed, 8 Sep 2021 19:40:47 +0300 Subject: [PATCH 124/125] Small fix --- docs/ru/interfaces/formats.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/ru/interfaces/formats.md b/docs/ru/interfaces/formats.md index 5bd0f8db406..970c6c36e9f 100644 --- a/docs/ru/interfaces/formats.md +++ b/docs/ru/interfaces/formats.md @@ -1188,7 +1188,7 @@ ClickHouse поддерживает настраиваемую точность $ cat {filename} | clickhouse-client --query="INSERT INTO {some_table} FORMAT Parquet" ``` -Чтобы вставлять данные в колонки типа [Nested](../sql-reference/data-types/nested-data-structures/nested.md) в виде массива структур, нужно включить настройку [input_format_parquet_import_nested](../operations/settings/settings.md#input_format_parquet_import_nested). +Чтобы вставить данные в колонки типа [Nested](../sql-reference/data-types/nested-data-structures/nested.md) в виде массива структур, нужно включить настройку [input_format_parquet_import_nested](../operations/settings/settings.md#input_format_parquet_import_nested). Чтобы получить данные из таблицы ClickHouse и сохранить их в файл формата Parquet, используйте команду следующего вида: @@ -1248,7 +1248,7 @@ ClickHouse поддерживает настраиваемую точность $ cat filename.arrow | clickhouse-client --query="INSERT INTO some_table FORMAT Arrow" ``` -Чтобы вставлять данные в колонки типа [Nested](../sql-reference/data-types/nested-data-structures/nested.md) в виде массива структур, нужно включить настройку [input_format_arrow_import_nested](../operations/settings/settings.md#input_format_arrow_import_nested). +Чтобы вставить данные в колонки типа [Nested](../sql-reference/data-types/nested-data-structures/nested.md) в виде массива структур, нужно включить настройку [input_format_arrow_import_nested](../operations/settings/settings.md#input_format_arrow_import_nested). ### Вывод данных {#selecting-data-arrow} @@ -1306,7 +1306,7 @@ ClickHouse поддерживает настраиваемую точность $ cat filename.orc | clickhouse-client --query="INSERT INTO some_table FORMAT ORC" ``` -Чтобы вставлять данные в колонки типа [Nested](../sql-reference/data-types/nested-data-structures/nested.md) в виде массива структур, нужно включить настройку [input_format_orc_import_nested](../operations/settings/settings.md#input_format_orc_import_nested). +Чтобы вставить данные в колонки типа [Nested](../sql-reference/data-types/nested-data-structures/nested.md) в виде массива структур, нужно включить настройку [input_format_orc_import_nested](../operations/settings/settings.md#input_format_orc_import_nested). ### Вывод данных {#selecting-data-2} From a24186734effcfbabd656540f43d056ca0bcbeca Mon Sep 17 00:00:00 2001 From: George Date: Wed, 8 Sep 2021 20:14:25 +0300 Subject: [PATCH 125/125] minor changes --- docs/en/operations/settings/settings.md | 18 +++++++++--------- docs/ru/operations/settings/settings.md | 18 +++++++++--------- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 9c54f925613..d5758976457 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -262,34 +262,34 @@ If both `input_format_allow_errors_num` and `input_format_allow_errors_ratio` ar ## input_format_parquet_import_nested {#input_format_parquet_import_nested} -Enables or disables the ability to insert `Array(Struct)` values into [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) columns in [Parquet](../../interfaces/formats.md#data-format-parquet) input format. +Enables or disables the ability to insert the data into [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) columns as an array of structs in [Parquet](../../interfaces/formats.md#data-format-parquet) input format. Possible values: -- 0 — `Array(Struct)` values can not be inserted into `Nested` columns. -- 1 — `Array(Struct)` values can be inserted into `Nested` columns. +- 0 — Data can not be inserted into `Nested` columns as an array of structs. +- 1 — Data can be inserted into `Nested` columns as an array of structs. Default value: `0`. ## input_format_arrow_import_nested {#input_format_arrow_import_nested} -Enables or disables the ability to insert `Array(Struct)` values into [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) columns in [Arrow](../../interfaces/formats.md#data_types-matching-arrow) input format. +Enables or disables the ability to insert the data into [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) columns as an array of structs in [Arrow](../../interfaces/formats.md#data_types-matching-arrow) input format. Possible values: -- 0 — `Array(Struct)` values can not be inserted into `Nested` columns. -- 1 — `Array(Struct)` values can be inserted into `Nested` columns. +- 0 — Data can not be inserted into `Nested` columns as an array of structs. +- 1 — Data can be inserted into `Nested` columns as an array of structs. Default value: `0`. ## input_format_orc_import_nested {#input_format_orc_import_nested} -Enables or disables the ability to insert `Array(Struct)` values into [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) columns in [ORC](../../interfaces/formats.md#data-format-orc) input format. +Enables or disables the ability to insert the data into [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) columns as an array of structs in [ORC](../../interfaces/formats.md#data-format-orc) input format. Possible values: -- 0 — `Array(Struct)` values can not be inserted into `Nested` columns. -- 1 — `Array(Struct)` values can be inserted into `Nested` columns. +- 0 — Data can not be inserted into `Nested` columns as an array of structs. +- 1 — Data can be inserted into `Nested` columns as an array of structs. Default value: `0`. diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index d69c7fddb30..9ad300b8c9c 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -239,34 +239,34 @@ ClickHouse применяет настройку в тех случаях, ко ## input_format_parquet_import_nested {#input_format_parquet_import_nested} -Включает или отключает возможность вставки данных типа `Array(Struct)` в колонки типа [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) в формате ввода [Parquet](../../interfaces/formats.md#data-format-parquet). +Включает или отключает возможность вставки данных в колонки типа [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) в виде массива структур в формате ввода [Parquet](../../interfaces/formats.md#data-format-parquet). Возможные значения: -- 0 — данные типа `Array(Struct)` не могут быть вставлены в колонки `Nested`. -- 1 — данные типа `Array(Struct)` могут быть вставлены в колонки `Nested`. +- 0 — данные не могут быть вставлены в колонки типа `Nested` в виде массива структур. +- 0 — данные могут быть вставлены в колонки типа `Nested` в виде массива структур. Значение по умолчанию: `0`. ## input_format_arrow_import_nested {#input_format_arrow_import_nested} -Включает или отключает возможность вставки данных типа `Array(Struct)` в колонки типа [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) в формате ввода [Arrow](../../interfaces/formats.md#data_types-matching-arrow). +Включает или отключает возможность вставки данных в колонки типа [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) в виде массива структур в формате ввода [Arrow](../../interfaces/formats.md#data_types-matching-arrow). Возможные значения: -- 0 — данные типа `Array(Struct)` не могут быть вставлены в колонки `Nested`. -- 1 — данные типа `Array(Struct)` могут быть вставлены в колонки `Nested`. +- 0 — данные не могут быть вставлены в колонки типа `Nested` в виде массива структур. +- 0 — данные могут быть вставлены в колонки типа `Nested` в виде массива структур. Значение по умолчанию: `0`. ## input_format_orc_import_nested {#input_format_orc_import_nested} -Включает или отключает возможность вставки данных типа `Array(Struct)` в колонки типа [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) в формате ввода [ORC](../../interfaces/formats.md#data-format-orc). +Включает или отключает возможность вставки данных в колонки типа [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) в виде массива структур в формате ввода [ORC](../../interfaces/formats.md#data-format-orc). Возможные значения: -- 0 — данные типа `Array(Struct)` не могут быть вставлены в колонки `Nested`. -- 1 — данные типа `Array(Struct)` могут быть вставлены в колонки `Nested`. +- 0 — данные не могут быть вставлены в колонки типа `Nested` в виде массива структур. +- 0 — данные могут быть вставлены в колонки типа `Nested` в виде массива структур. Значение по умолчанию: `0`.